From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Nov 2010 23:24:26 +0000 Subject: net: avoid limits overflow Robin Holt tried to boot a 16TB machine and found some limits were reached : sysctl_tcp_mem[2], sysctl_udp_mem[2] We can switch infrastructure to use long "instead" of "int", now atomic_long_t primitives are available for free. Signed-off-by: Eric Dumazet Reported-by: Robin Holt Reviewed-by: Robin Holt Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/dn.h | 2 +- include/net/sock.h | 4 ++-- include/net/tcp.h | 6 +++--- include/net/udp.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/dn.h b/include/net/dn.h index e5469f7b67a3..a514a3cf4573 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -225,7 +225,7 @@ extern int decnet_di_count; extern int decnet_dr_count; extern int decnet_no_fc_max_cwnd; -extern int sysctl_decnet_mem[3]; +extern long sysctl_decnet_mem[3]; extern int sysctl_decnet_wmem[3]; extern int sysctl_decnet_rmem[3]; diff --git a/include/net/sock.h b/include/net/sock.h index c7a736228ca2..a6338d039857 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -762,7 +762,7 @@ struct proto { /* Memory pressure */ void (*enter_memory_pressure)(struct sock *sk); - atomic_t *memory_allocated; /* Current allocated memory. */ + atomic_long_t *memory_allocated; /* Current allocated memory. */ struct percpu_counter *sockets_allocated; /* Current number of sockets. */ /* * Pressure flag: try to collapse. @@ -771,7 +771,7 @@ struct proto { * is strict, actions are advisory and have some latency. */ int *memory_pressure; - int *sysctl_mem; + long *sysctl_mem; int *sysctl_wmem; int *sysctl_rmem; int max_header; diff --git a/include/net/tcp.h b/include/net/tcp.h index 4fee0424af7e..e36c874c7fb1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -224,7 +224,7 @@ extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; extern int sysctl_tcp_dsack; -extern int sysctl_tcp_mem[3]; +extern long sysctl_tcp_mem[3]; extern int sysctl_tcp_wmem[3]; extern int sysctl_tcp_rmem[3]; extern int sysctl_tcp_app_win; @@ -247,7 +247,7 @@ extern int sysctl_tcp_cookie_size; extern int sysctl_tcp_thin_linear_timeouts; extern int sysctl_tcp_thin_dupack; -extern atomic_t tcp_memory_allocated; +extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; extern int tcp_memory_pressure; @@ -280,7 +280,7 @@ static inline bool tcp_too_many_orphans(struct sock *sk, int shift) } if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && - atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) + atomic_long_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) return true; return false; } diff --git a/include/net/udp.h b/include/net/udp.h index 200b82848c9a..bb967dd59bf7 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -105,10 +105,10 @@ static inline struct udp_hslot *udp_hashslot2(struct udp_table *table, extern struct proto udp_prot; -extern atomic_t udp_memory_allocated; +extern atomic_long_t udp_memory_allocated; /* sysctl variables for udp */ -extern int sysctl_udp_mem[3]; +extern long sysctl_udp_mem[3]; extern int sysctl_udp_rmem_min; extern int sysctl_udp_wmem_min; -- cgit v1.2.3-59-g8ed1b From 1d7138de878d1d4210727c1200193e69596f93b3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 12 Nov 2010 05:46:50 +0000 Subject: igmp: RCU conversion of in_dev->mc_list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit in_dev->mc_list is protected by one rwlock (in_dev->mc_list_lock). This can easily be converted to a RCU protection. Writers hold RTNL, so mc_list_lock is removed, not replaced by a spinlock. Signed-off-by: Eric Dumazet Cc: Cypher Wu Cc: Américo Wang Signed-off-by: David S. Miller --- include/linux/igmp.h | 12 ++- include/linux/inetdevice.h | 5 +- include/net/inet_sock.h | 2 +- net/ipv4/igmp.c | 223 +++++++++++++++++++++------------------------ 4 files changed, 115 insertions(+), 127 deletions(-) (limited to 'include/net') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 93fc2449af10..7d164670f264 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -167,10 +167,10 @@ struct ip_sf_socklist { */ struct ip_mc_socklist { - struct ip_mc_socklist *next; + struct ip_mc_socklist __rcu *next_rcu; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ - struct ip_sf_socklist *sflist; + struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; @@ -186,11 +186,14 @@ struct ip_sf_list { struct ip_mc_list { struct in_device *interface; __be32 multiaddr; + unsigned int sfmode; struct ip_sf_list *sources; struct ip_sf_list *tomb; - unsigned int sfmode; unsigned long sfcount[2]; - struct ip_mc_list *next; + union { + struct ip_mc_list *next; + struct ip_mc_list __rcu *next_rcu; + }; struct timer_list timer; int users; atomic_t refcnt; @@ -201,6 +204,7 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + struct rcu_head rcu; }; /* V3 exponential field decoding */ diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ccd5b07d678d..380ba6bc5db1 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -52,9 +52,8 @@ struct in_device { atomic_t refcnt; int dead; struct in_ifaddr *ifa_list; /* IP ifaddr chain */ - rwlock_t mc_list_lock; - struct ip_mc_list *mc_list; /* IP multicast filter chain */ - int mc_count; /* Number of installed mcasts */ + struct ip_mc_list __rcu *mc_list; /* IP multicast filter chain */ + int mc_count; /* Number of installed mcasts */ spinlock_t mc_tomb_lock; struct ip_mc_list *mc_tomb; unsigned long mr_v1_seen; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1989cfd7405f..8945f9fb192a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -141,7 +141,7 @@ struct inet_sock { nodefrag:1; int mc_index; __be32 mc_addr; - struct ip_mc_socklist *mc_list; + struct ip_mc_socklist __rcu *mc_list; struct { unsigned int flags; unsigned int fragsize; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 08d0d81ffc15..6f49d6c087da 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -149,11 +149,17 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc); static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, int sfcount, __be32 *psfsrc, int delta); + +static void ip_mc_list_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ip_mc_list, rcu)); +} + static void ip_ma_put(struct ip_mc_list *im) { if (atomic_dec_and_test(&im->refcnt)) { in_dev_put(im->interface); - kfree(im); + call_rcu(&im->rcu, ip_mc_list_reclaim); } } @@ -163,7 +169,7 @@ static void ip_ma_put(struct ip_mc_list *im) * Timer management */ -static __inline__ void igmp_stop_timer(struct ip_mc_list *im) +static void igmp_stop_timer(struct ip_mc_list *im) { spin_lock_bh(&im->lock); if (del_timer(&im->timer)) @@ -496,14 +502,24 @@ empty_source: return skb; } +#define for_each_pmc_rcu(in_dev, pmc) \ + for (pmc = rcu_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next_rcu)) + +#define for_each_pmc_rtnl(in_dev, pmc) \ + for (pmc = rtnl_dereference(in_dev->mc_list); \ + pmc != NULL; \ + pmc = rtnl_dereference(pmc->next_rcu)) + static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) { struct sk_buff *skb = NULL; int type; if (!pmc) { - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (pmc->multiaddr == IGMP_ALL_HOSTS) continue; spin_lock_bh(&pmc->lock); @@ -514,7 +530,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } else { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) @@ -556,7 +572,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) struct sk_buff *skb = NULL; int type, dtype; - read_lock(&in_dev->mc_list_lock); + rcu_read_lock(); spin_lock_bh(&in_dev->mc_tomb_lock); /* deleted MCA's */ @@ -593,7 +609,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) spin_unlock_bh(&in_dev->mc_tomb_lock); /* change recs */ - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(in_dev, pmc) { spin_lock_bh(&pmc->lock); if (pmc->sfcount[MCAST_EXCLUDE]) { type = IGMPV3_BLOCK_OLD_SOURCES; @@ -616,7 +632,7 @@ static void igmpv3_send_cr(struct in_device *in_dev) } spin_unlock_bh(&pmc->lock); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); if (!skb) return; @@ -813,14 +829,14 @@ static void igmp_heard_report(struct in_device *in_dev, __be32 group) if (group == IGMP_ALL_HOSTS) return; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == group) { igmp_stop_timer(im); break; } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, @@ -906,8 +922,8 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, * - Use the igmp->igmp_code field as the maximum * delay possible */ - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im!=NULL; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { int changed; if (group && group != im->multiaddr) @@ -925,7 +941,7 @@ static void igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb, if (changed) igmp_mod_timer(im, max_delay); } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } /* called in rcu_read_lock() section */ @@ -1110,8 +1126,8 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(pmc); } /* clear dead sources, too */ - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { struct ip_sf_list *psf, *psf_next; spin_lock_bh(&pmc->lock); @@ -1123,7 +1139,7 @@ static void igmpv3_clear_delrec(struct in_device *in_dev) kfree(psf); } } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); } #endif @@ -1209,7 +1225,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) ASSERT_RTNL(); - for (im=in_dev->mc_list; im; im=im->next) { + for_each_pmc_rtnl(in_dev, im) { if (im->multiaddr == addr) { im->users++; ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0); @@ -1217,7 +1233,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) } } - im = kmalloc(sizeof(*im), GFP_KERNEL); + im = kzalloc(sizeof(*im), GFP_KERNEL); if (!im) goto out; @@ -1227,26 +1243,18 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) im->multiaddr = addr; /* initial mode is (EX, empty) */ im->sfmode = MCAST_EXCLUDE; - im->sfcount[MCAST_INCLUDE] = 0; im->sfcount[MCAST_EXCLUDE] = 1; - im->sources = NULL; - im->tomb = NULL; - im->crcount = 0; atomic_set(&im->refcnt, 1); spin_lock_init(&im->lock); #ifdef CONFIG_IP_MULTICAST - im->tm_running = 0; setup_timer(&im->timer, &igmp_timer_expire, (unsigned long)im); im->unsolicit_count = IGMP_Unsolicited_Report_Count; - im->reporter = 0; - im->gsquery = 0; #endif - im->loaded = 0; - write_lock_bh(&in_dev->mc_list_lock); - im->next = in_dev->mc_list; - in_dev->mc_list = im; + + im->next_rcu = in_dev->mc_list; in_dev->mc_count++; - write_unlock_bh(&in_dev->mc_list_lock); + rcu_assign_pointer(in_dev->mc_list, im); + #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); #endif @@ -1287,17 +1295,18 @@ EXPORT_SYMBOL(ip_mc_rejoin_group); void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) { - struct ip_mc_list *i, **ip; + struct ip_mc_list *i; + struct ip_mc_list __rcu **ip; ASSERT_RTNL(); - for (ip=&in_dev->mc_list; (i=*ip)!=NULL; ip=&i->next) { + for (ip = &in_dev->mc_list; + (i = rtnl_dereference(*ip)) != NULL; + ip = &i->next_rcu) { if (i->multiaddr == addr) { if (--i->users == 0) { - write_lock_bh(&in_dev->mc_list_lock); - *ip = i->next; + *ip = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); if (!in_dev->dead) @@ -1316,34 +1325,34 @@ EXPORT_SYMBOL(ip_mc_dec_group); void ip_mc_unmap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); } void ip_mc_remap(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i = in_dev->mc_list; i; i = i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* Device going down */ void ip_mc_down(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_dropped(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_dropped(pmc); #ifdef CONFIG_IP_MULTICAST in_dev->mr_ifc_count = 0; @@ -1374,7 +1383,6 @@ void ip_mc_init_dev(struct in_device *in_dev) in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; #endif - rwlock_init(&in_dev->mc_list_lock); spin_lock_init(&in_dev->mc_tomb_lock); } @@ -1382,14 +1390,14 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { - struct ip_mc_list *i; + struct ip_mc_list *pmc; ASSERT_RTNL(); ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS); - for (i=in_dev->mc_list; i; i=i->next) - igmp_group_added(i); + for_each_pmc_rtnl(in_dev, pmc) + igmp_group_added(pmc); } /* @@ -1405,17 +1413,13 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* Deactivate timers */ ip_mc_down(in_dev); - write_lock_bh(&in_dev->mc_list_lock); - while ((i = in_dev->mc_list) != NULL) { - in_dev->mc_list = i->next; + while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { + in_dev->mc_list = i->next_rcu; in_dev->mc_count--; - write_unlock_bh(&in_dev->mc_list_lock); + igmp_group_dropped(i); ip_ma_put(i); - - write_lock_bh(&in_dev->mc_list_lock); } - write_unlock_bh(&in_dev->mc_list_lock); } /* RTNL is locked */ @@ -1513,18 +1517,18 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); #endif @@ -1685,18 +1689,18 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode, if (!in_dev) return -ENODEV; - read_lock(&in_dev->mc_list_lock); - for (pmc=in_dev->mc_list; pmc; pmc=pmc->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, pmc) { if (*pmca == pmc->multiaddr) break; } if (!pmc) { /* MCA not found?? bug */ - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return -ESRCH; } spin_lock_bh(&pmc->lock); - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); #ifdef CONFIG_IP_MULTICAST sf_markstate(pmc); @@ -1793,7 +1797,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) err = -EADDRINUSE; ifindex = imr->imr_ifindex; - for (i = inet->mc_list; i; i = i->next) { + for_each_pmc_rtnl(inet, i) { if (i->multi.imr_multiaddr.s_addr == addr && i->multi.imr_ifindex == ifindex) goto done; @@ -1807,7 +1811,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr) goto done; memcpy(&iml->multi, imr, sizeof(*imr)); - iml->next = inet->mc_list; + iml->next_rcu = inet->mc_list; iml->sflist = NULL; iml->sfmode = MCAST_EXCLUDE; rcu_assign_pointer(inet->mc_list, iml); @@ -1821,17 +1825,14 @@ EXPORT_SYMBOL(ip_mc_join_group); static void ip_sf_socklist_reclaim(struct rcu_head *rp) { - struct ip_sf_socklist *psf; - - psf = container_of(rp, struct ip_sf_socklist, rcu); + kfree(container_of(rp, struct ip_sf_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(psf); } static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, struct in_device *in_dev) { - struct ip_sf_socklist *psf = iml->sflist; + struct ip_sf_socklist *psf = rtnl_dereference(iml->sflist); int err; if (psf == NULL) { @@ -1851,11 +1852,8 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, static void ip_mc_socklist_reclaim(struct rcu_head *rp) { - struct ip_mc_socklist *iml; - - iml = container_of(rp, struct ip_mc_socklist, rcu); + kfree(container_of(rp, struct ip_mc_socklist, rcu)); /* sk_omem_alloc should have been decreased by the caller*/ - kfree(iml); } @@ -1866,7 +1864,8 @@ static void ip_mc_socklist_reclaim(struct rcu_head *rp) int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) { struct inet_sock *inet = inet_sk(sk); - struct ip_mc_socklist *iml, **imlp; + struct ip_mc_socklist *iml; + struct ip_mc_socklist __rcu **imlp; struct in_device *in_dev; struct net *net = sock_net(sk); __be32 group = imr->imr_multiaddr.s_addr; @@ -1876,7 +1875,9 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) rtnl_lock(); in_dev = ip_mc_find_dev(net, imr); ifindex = imr->imr_ifindex; - for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { + for (imlp = &inet->mc_list; + (iml = rtnl_dereference(*imlp)) != NULL; + imlp = &iml->next_rcu) { if (iml->multi.imr_multiaddr.s_addr != group) continue; if (ifindex) { @@ -1888,7 +1889,7 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr) (void) ip_mc_leave_src(sk, iml, in_dev); - rcu_assign_pointer(*imlp, iml->next); + *imlp = iml->next_rcu; if (in_dev) ip_mc_dec_group(in_dev, group); @@ -1934,7 +1935,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if ((pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr) && (pmc->multi.imr_ifindex == imr.imr_ifindex)) @@ -1958,7 +1959,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct pmc->sfmode = omode; } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (!add) { if (!psl) goto done; /* err = -EADDRNOTAVAIL */ @@ -2077,7 +2078,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2107,7 +2108,7 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, 0, NULL, 0); } - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); if (psl) { (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); @@ -2155,7 +2156,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, } err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && pmc->multi.imr_ifindex == imr.imr_ifindex) break; @@ -2163,7 +2164,7 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, if (!pmc) /* must have a prior join */ goto done; msf->imsf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); if (!psl) { len = 0; @@ -2208,7 +2209,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, err = -EADDRNOTAVAIL; - for (pmc=inet->mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rtnl(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == addr && pmc->multi.imr_ifindex == gsf->gf_interface) break; @@ -2216,7 +2217,7 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf, if (!pmc) /* must have a prior join */ goto done; gsf->gf_fmode = pmc->sfmode; - psl = pmc->sflist; + psl = rtnl_dereference(pmc->sflist); rtnl_unlock(); count = psl ? psl->sl_count : 0; copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc; @@ -2257,7 +2258,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) goto out; rcu_read_lock(); - for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) { + for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && pmc->multi.imr_ifindex == dif) break; @@ -2265,7 +2266,7 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) ret = inet->mc_all; if (!pmc) goto unlock; - psl = pmc->sflist; + psl = rcu_dereference(pmc->sflist); ret = (pmc->sfmode == MCAST_EXCLUDE); if (!psl) goto unlock; @@ -2300,10 +2301,10 @@ void ip_mc_drop_socket(struct sock *sk) return; rtnl_lock(); - while ((iml = inet->mc_list) != NULL) { + while ((iml = rtnl_dereference(inet->mc_list)) != NULL) { struct in_device *in_dev; - rcu_assign_pointer(inet->mc_list, iml->next); + inet->mc_list = iml->next_rcu; in_dev = inetdev_by_index(net, iml->multi.imr_ifindex); (void) ip_mc_leave_src(sk, iml, in_dev); if (in_dev != NULL) { @@ -2323,8 +2324,8 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p struct ip_sf_list *psf; int rv = 0; - read_lock(&in_dev->mc_list_lock); - for (im=in_dev->mc_list; im; im=im->next) { + rcu_read_lock(); + for_each_pmc_rcu(in_dev, im) { if (im->multiaddr == mc_addr) break; } @@ -2345,7 +2346,7 @@ int ip_check_mc(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 p } else rv = 1; /* unspecified source; tentatively allow */ } - read_unlock(&in_dev->mc_list_lock); + rcu_read_unlock(); return rv; } @@ -2371,13 +2372,11 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) in_dev = __in_dev_get_rcu(state->dev); if (!in_dev) continue; - read_lock(&in_dev->mc_list_lock); - im = in_dev->mc_list; + im = rcu_dereference(in_dev->mc_list); if (im) { state->in_dev = in_dev; break; } - read_unlock(&in_dev->mc_list_lock); } return im; } @@ -2385,11 +2384,9 @@ static inline struct ip_mc_list *igmp_mc_get_first(struct seq_file *seq) static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_list *im) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - im = im->next; - while (!im) { - if (likely(state->in_dev != NULL)) - read_unlock(&state->in_dev->mc_list_lock); + im = rcu_dereference(im->next_rcu); + while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->in_dev = NULL; @@ -2398,8 +2395,7 @@ static struct ip_mc_list *igmp_mc_get_next(struct seq_file *seq, struct ip_mc_li state->in_dev = __in_dev_get_rcu(state->dev); if (!state->in_dev) continue; - read_lock(&state->in_dev->mc_list_lock); - im = state->in_dev->mc_list; + im = rcu_dereference(state->in_dev->mc_list); } return im; } @@ -2435,10 +2431,8 @@ static void igmp_mc_seq_stop(struct seq_file *seq, void *v) __releases(rcu) { struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); - if (likely(state->in_dev != NULL)) { - read_unlock(&state->in_dev->mc_list_lock); - state->in_dev = NULL; - } + + state->in_dev = NULL; state->dev = NULL; rcu_read_unlock(); } @@ -2460,7 +2454,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) querier = "NONE"; #endif - if (state->in_dev->mc_list == im) { + if (rcu_dereference(state->in_dev->mc_list) == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } @@ -2519,8 +2513,7 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) idev = __in_dev_get_rcu(state->dev); if (unlikely(idev == NULL)) continue; - read_lock(&idev->mc_list_lock); - im = idev->mc_list; + im = rcu_dereference(idev->mc_list); if (likely(im != NULL)) { spin_lock_bh(&im->lock); psf = im->sources; @@ -2531,7 +2524,6 @@ static inline struct ip_sf_list *igmp_mcf_get_first(struct seq_file *seq) } spin_unlock_bh(&im->lock); } - read_unlock(&idev->mc_list_lock); } return psf; } @@ -2545,9 +2537,6 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l spin_unlock_bh(&state->im->lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) - read_unlock(&state->idev->mc_list_lock); - state->dev = next_net_device_rcu(state->dev); if (!state->dev) { state->idev = NULL; @@ -2556,8 +2545,7 @@ static struct ip_sf_list *igmp_mcf_get_next(struct seq_file *seq, struct ip_sf_l state->idev = __in_dev_get_rcu(state->dev); if (!state->idev) continue; - read_lock(&state->idev->mc_list_lock); - state->im = state->idev->mc_list; + state->im = rcu_dereference(state->idev->mc_list); } if (!state->im) break; @@ -2603,10 +2591,7 @@ static void igmp_mcf_seq_stop(struct seq_file *seq, void *v) spin_unlock_bh(&state->im->lock); state->im = NULL; } - if (likely(state->idev != NULL)) { - read_unlock(&state->idev->mc_list_lock); - state->idev = NULL; - } + state->idev = NULL; state->dev = NULL; rcu_read_unlock(); } -- cgit v1.2.3-59-g8ed1b From 749b527b21465fb079796c03ffb4302584dc31c1 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 20 Oct 2010 10:18:54 -0700 Subject: cfg80211: fix allowing country IEs for WIPHY_FLAG_STRICT_REGULATORY We should be enabling country IE hints for WIPHY_FLAG_STRICT_REGULATORY even if we haven't yet recieved regulatory domain hint for the driver if it needed one. Without this Country IEs are not passed on to drivers that have set WIPHY_FLAG_STRICT_REGULATORY, today this is just all Atheros chipset drivers: ath5k, ath9k, ar9170, carl9170. This was part of the original design, however it was completely overlooked... Cc: Easwar Krishnan Cc: stable@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/cfg80211.h | 15 ++++++++------- net/wireless/reg.c | 1 + 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2a7936d7851d..e5702f5ac57c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1321,13 +1321,14 @@ struct cfg80211_ops { * initiator is %REGDOM_SET_BY_CORE). * @WIPHY_FLAG_STRICT_REGULATORY: tells us the driver for this device will * ignore regulatory domain settings until it gets its own regulatory - * domain via its regulatory_hint(). After its gets its own regulatory - * domain it will only allow further regulatory domain settings to - * further enhance compliance. For example if channel 13 and 14 are - * disabled by this regulatory domain no user regulatory domain can - * enable these channels at a later time. This can be used for devices - * which do not have calibration information gauranteed for frequencies - * or settings outside of its regulatory domain. + * domain via its regulatory_hint() unless the regulatory hint is + * from a country IE. After its gets its own regulatory domain it will + * only allow further regulatory domain settings to further enhance + * compliance. For example if channel 13 and 14 are disabled by this + * regulatory domain no user regulatory domain can enable these channels + * at a later time. This can be used for devices which do not have + * calibration information guaranteed for frequencies or settings + * outside of its regulatory domain. * @WIPHY_FLAG_DISABLE_BEACON_HINTS: enable this if your driver needs to ensure * that passive scan flags and beaconing flags may not be lifted by * cfg80211 due to regulatory beacon hints. For more information on beacon diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b64596fe7363..1bc8131a5185 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -813,6 +813,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, * desired regulatory domain set */ if (wiphy->flags & WIPHY_FLAG_STRICT_REGULATORY && !wiphy->regd && + initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && !is_world_regdom(last_request->alpha2)) return true; return false; -- cgit v1.2.3-59-g8ed1b From cc9ff19da9bf76a2f70bcb80225a1c587c162e52 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Wed, 3 Nov 2010 04:41:38 +0000 Subject: xfrm: use gre key as flow upper protocol info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GRE Key field is intended to be used for identifying an individual traffic flow within a tunnel. It is useful to be able to have XFRM policy selector matches to have different policies for different GRE tunnels. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- include/net/flow.h | 2 ++ include/net/xfrm.h | 6 ++++++ net/ipv4/ip_gre.c | 12 +++++++----- net/ipv4/xfrm4_policy.c | 15 +++++++++++++++ 4 files changed, 30 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 0ac3fb5e0973..7196e6864b8d 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -67,6 +67,7 @@ struct flowi { } dnports; __be32 spi; + __be32 gre_key; struct { __u8 type; @@ -78,6 +79,7 @@ struct flowi { #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi #define fl_mh_type uli_u.mht.type +#define fl_gre_key uli_u.gre_key __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index bcfb6b24b019..54b283229488 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -805,6 +805,9 @@ __be16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_MH: port = htons(fl->fl_mh_type); break; + case IPPROTO_GRE: + port = htonl(fl->fl_gre_key) >> 16; + break; default: port = 0; /*XXX*/ } @@ -826,6 +829,9 @@ __be16 xfrm_flowi_dport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_code); break; + case IPPROTO_GRE: + port = htonl(fl->fl_gre_key) & 0xffff; + break; default: port = 0; /*XXX*/ } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index cab2057d5430..aace653710f6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -779,9 +779,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev .tos = RT_TOS(tos) } }, - .proto = IPPROTO_GRE - } -; + .proto = IPPROTO_GRE, + .fl_gre_key = tunnel->parms.o_key + }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { dev->stats.tx_carrier_errors++; goto tx_error; @@ -958,7 +958,8 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) .tos = RT_TOS(iph->tos) } }, - .proto = IPPROTO_GRE + .proto = IPPROTO_GRE, + .fl_gre_key = tunnel->parms.o_key }; struct rtable *rt; @@ -1223,7 +1224,8 @@ static int ipgre_open(struct net_device *dev) .tos = RT_TOS(t->parms.iph.tos) } }, - .proto = IPPROTO_GRE + .proto = IPPROTO_GRE, + .fl_gre_key = t->parms.o_key }; struct rtable *rt; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index dd1fd8c473fc..4a8c5335770c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -154,6 +155,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } break; + + case IPPROTO_GRE: + if (pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags = (__be16 *)xprth; + __be32 *gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl->fl_gre_key = gre_hdr[1]; + } + } + break; + default: fl->fl_ipsec_spi = 0; break; -- cgit v1.2.3-59-g8ed1b From d577f1ccdd8ae8bfbe6063eb2ba2a350259e9031 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sun, 14 Nov 2010 17:04:58 +0000 Subject: include/net/caif/cfctrl.h: Remove unnecessary semicolons Signed-off-by: Joe Perches Acked-by: Sjur Braendeland Signed-off-by: David S. Miller --- include/net/caif/cfctrl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h index 9402543fc20d..e54f6396fa4c 100644 --- a/include/net/caif/cfctrl.h +++ b/include/net/caif/cfctrl.h @@ -51,7 +51,7 @@ struct cfctrl_rsp { void (*restart_rsp)(void); void (*radioset_rsp)(void); void (*reject_rsp)(struct cflayer *layer, u8 linkid, - struct cflayer *client_layer);; + struct cflayer *client_layer); }; /* Link Setup Parameters for CAIF-Links. */ -- cgit v1.2.3-59-g8ed1b From 62370e2b9376ea7b76e0423de28ccb322c17e2da Mon Sep 17 00:00:00 2001 From: Arnd Hannemann Date: Thu, 11 Nov 2010 11:44:32 -0600 Subject: b43legacy: Fix compile on ARM architecture When b43legacy is compiled on the arm platform, the following errors are seen: CC [M] drivers/net/wireless/b43legacy/xmit.o In file included from include/net/dst.h:11, from drivers/net/wireless/b43legacy/xmit.c:31: include/net/dst_ops.h:28: error: expected ':', ',', ';', '}' or '__attribute__' before '____cacheline_aligned_in_smp' include/net/dst_ops.h: In function 'dst_entries_get_fast': include/net/dst_ops.h:33: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_get_slow': include/net/dst_ops.h:41: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_add': include/net/dst_ops.h:49: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_init': include/net/dst_ops.h:55: error: 'struct dst_ops' has no member named 'pcpuc_entries' include/net/dst_ops.h: In function 'dst_entries_destroy': include/net/dst_ops.h:60: error: 'struct dst_ops' has no member named 'pcpuc_entries' make[4]: *** [drivers/net/wireless/b43legacy/xmit.o] Error 1 make[3]: *** [drivers/net/wireless/b43legacy] Error 2 make[2]: *** [drivers/net/wireless] Error 2 make[1]: *** [drivers/net] Error 2 make: *** [drivers] Error 2 The cause is a missing include of , which is present for i386 and x86_64 architectures, but not for arm. Signed-off-by: Arnd Hannemann Signed-off-by: Larry Finger Cc: Stable Signed-off-by: John W. Linville --- include/net/dst_ops.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 1fa5306e3e23..51665b3461b8 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -2,6 +2,7 @@ #define _NET_DST_OPS_H #include #include +#include struct dst_entry; struct kmem_cachep; -- cgit v1.2.3-59-g8ed1b From 309075cf08ed92a7d2c0e22b7653c5daabbd7ad1 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 12 Nov 2010 08:53:56 +0200 Subject: cfg80211: fix WIPHY_FLAG_IBSS_RSN bit WIPHY_FLAG_IBSS_RSN is BIT(7) as is WIPHY_FLAG_CONTROL_PORT_PROTOCOL. Change to BIT(8). Signed-off-by: Jussi Kivilinna Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2a7936d7851d..97b8b7c9b63c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1355,7 +1355,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), - WIPHY_FLAG_IBSS_RSN = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(8), }; struct mac_address { -- cgit v1.2.3-59-g8ed1b From 3654654f7aa79a37dde130afb7409c55b11807e7 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 16 Nov 2010 09:52:32 -0800 Subject: netlink: let nlmsg and nla functions take pointer-to-const args The changed functions do not modify the NL messages and/or attributes at all. They should use const (similar to strchr), so that callers which have a const nlmsg/nlattr around can make use of them without casting. While at it, constify a data array. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- include/net/netlink.h | 21 +++++++++++++-------- lib/nlattr.c | 22 +++++++++++----------- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 9801c55de5d6..373f1a900cf4 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -225,13 +225,15 @@ extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, unsigned int group, int report, gfp_t flags); -extern int nla_validate(struct nlattr *head, int len, int maxtype, +extern int nla_validate(const struct nlattr *head, + int len, int maxtype, const struct nla_policy *policy); -extern int nla_parse(struct nlattr *tb[], int maxtype, - struct nlattr *head, int len, +extern int nla_parse(struct nlattr **tb, int maxtype, + const struct nlattr *head, int len, const struct nla_policy *policy); extern int nla_policy_len(const struct nla_policy *, int); -extern struct nlattr * nla_find(struct nlattr *head, int len, int attrtype); +extern struct nlattr * nla_find(const struct nlattr *head, + int len, int attrtype); extern size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize); extern int nla_memcpy(void *dest, const struct nlattr *src, int count); @@ -346,7 +348,8 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) * Returns the next netlink message in the message stream and * decrements remaining by the size of the current message. */ -static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining) +static inline struct nlmsghdr * +nlmsg_next(const struct nlmsghdr *nlh, int *remaining) { int totlen = NLMSG_ALIGN(nlh->nlmsg_len); @@ -398,7 +401,8 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * @maxtype: maximum attribute type to be expected * @policy: validation policy */ -static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, +static inline int nlmsg_validate(const struct nlmsghdr *nlh, + int hdrlen, int maxtype, const struct nla_policy *policy) { if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) @@ -727,7 +731,8 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) * * Returns the first attribute which matches the specified type. */ -static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) +static inline struct nlattr * +nla_find_nested(const struct nlattr *nla, int attrtype) { return nla_find(nla_data(nla), nla_len(nla), attrtype); } @@ -1032,7 +1037,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * * Returns 0 on success or a negative error code. */ -static inline int nla_validate_nested(struct nlattr *start, int maxtype, +static inline int nla_validate_nested(const struct nlattr *start, int maxtype, const struct nla_policy *policy) { return nla_validate(nla_data(start), nla_len(start), maxtype, policy); diff --git a/lib/nlattr.c b/lib/nlattr.c index c4706eb98d3d..00e8a02681a6 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -15,7 +15,7 @@ #include #include -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { +static const u16 nla_attr_minlen[NLA_TYPE_MAX+1] = { [NLA_U8] = sizeof(u8), [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), @@ -23,7 +23,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_NESTED] = NLA_HDRLEN, }; -static int validate_nla(struct nlattr *nla, int maxtype, +static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy) { const struct nla_policy *pt; @@ -115,10 +115,10 @@ static int validate_nla(struct nlattr *nla, int maxtype, * * Returns 0 on success or a negative error code. */ -int nla_validate(struct nlattr *head, int len, int maxtype, +int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; nla_for_each_attr(nla, head, len, rem) { @@ -173,10 +173,10 @@ nla_policy_len(const struct nla_policy *p, int n) * * Returns 0 on success or a negative error code. */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) +int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, + int len, const struct nla_policy *policy) { - struct nlattr *nla; + const struct nlattr *nla; int rem, err; memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); @@ -191,7 +191,7 @@ int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, goto errout; } - tb[type] = nla; + tb[type] = (struct nlattr *)nla; } } @@ -212,14 +212,14 @@ errout: * * Returns the first attribute in the stream matching the specified type. */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) +struct nlattr *nla_find(const struct nlattr *head, int len, int attrtype) { - struct nlattr *nla; + const struct nlattr *nla; int rem; nla_for_each_attr(nla, head, len, rem) if (nla_type(nla) == attrtype) - return nla; + return (struct nlattr *)nla; return NULL; } -- cgit v1.2.3-59-g8ed1b From c31504dc0d1dc853dcee509d9999169a9097a717 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Nov 2010 19:58:26 +0000 Subject: udp: use atomic_inc_not_zero_hint UDP sockets refcount is usually 2, unless an incoming frame is going to be queued in receive or backlog queue. Using atomic_inc_not_zero_hint() permits to reduce latency, because processor issues less memory transactions. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 2 +- net/ipv4/udp.c | 4 ++-- net/ipv6/udp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a6338d039857..eb0c1f504678 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -57,7 +57,7 @@ #include #include -#include +#include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5e0a3a582a59..491ecd3f7a01 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -430,7 +430,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -500,7 +500,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, saddr, hnum, sport, daddr, dport, dif) < badness)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93bec85..b541a4e009fb 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -227,7 +227,7 @@ begin: if (result) { exact_match: - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score2(result, net, saddr, sport, daddr, hnum, dif) < badness)) { @@ -294,7 +294,7 @@ begin: goto begin; if (result) { - if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2))) result = NULL; else if (unlikely(compute_score(result, net, hnum, saddr, sport, daddr, dport, dif) < badness)) { -- cgit v1.2.3-59-g8ed1b From b178bb3dfc30d9555bdd2401e95af98e23e83e10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 16 Nov 2010 05:56:04 +0000 Subject: net: reorder struct sock fields Right now, fields in struct sock are not optimally ordered, because each path (RX softirq, TX completion, RX user, TX user) has to touch fields that are contained in many different cache lines. The really critical thing is to shrink number of cache lines that are used at RX softirq time : CPU handling softirqs for a device can receive many frames per second for many sockets. If load is too big, we can drop frames at NIC level. RPS or multiqueue cards can help, but better reduce latency if possible. This patch starts with UDP protocol, then additional patches will try to reduce latencies of other ones as well. At RX softirq time, fields of interest for UDP protocol are : (not counting ones in inet struct for the lookup) Read/Written: sk_refcnt (atomic increment/decrement) sk_rmem_alloc & sk_backlog.len (to check if there is room in queues) sk_receive_queue sk_backlog (if socket locked by user program) sk_rxhash sk_forward_alloc sk_drops Read only: sk_rcvbuf (sk_rcvqueues_full()) sk_filter sk_wq sk_policy[0] sk_flags Additional notes : - sk_backlog has one hole on 64bit arches. We can fill it to save 8 bytes. - sk_backlog is used only if RX sofirq handler finds the socket while locked by user. - sk_rxhash is written only once per flow. - sk_drops is written only if queues are full Final layout : [1] One section grouping all read/write fields, but placing rxhash and sk_backlog at the end of this section. [2] One section grouping all read fields in RX handler (sk_filter, sk_rcv_buf, sk_wq) [3] Section used by other paths I'll post a patch on its own to put sk_refcnt at the end of struct sock_common so that it shares same cache line than section [1] New offsets on 64bit arch : sizeof(struct sock)=0x268 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x48 offsetof(struct sock, sk_receive_queue)=0x68 offsetof(struct sock, sk_backlog)=0x80 offsetof(struct sock, sk_rmem_alloc)=0x80 offsetof(struct sock, sk_forward_alloc)=0x98 offsetof(struct sock, sk_rxhash)=0x9c offsetof(struct sock, sk_rcvbuf)=0xa4 offsetof(struct sock, sk_drops) =0xa0 offsetof(struct sock, sk_filter)=0xa8 offsetof(struct sock, sk_wq)=0xb0 offsetof(struct sock, sk_policy)=0xd0 offsetof(struct sock, sk_flags) =0xe0 Instead of : sizeof(struct sock)=0x270 offsetof(struct sock, sk_refcnt) =0x10 offsetof(struct sock, sk_lock) =0x50 offsetof(struct sock, sk_receive_queue)=0xc0 offsetof(struct sock, sk_backlog)=0x70 offsetof(struct sock, sk_rmem_alloc)=0xac offsetof(struct sock, sk_forward_alloc)=0x10c offsetof(struct sock, sk_rxhash)=0x128 offsetof(struct sock, sk_rcvbuf)=0x4c offsetof(struct sock, sk_drops) =0x16c offsetof(struct sock, sk_filter)=0x198 offsetof(struct sock, sk_wq)=0x88 offsetof(struct sock, sk_policy)=0x98 offsetof(struct sock, sk_flags) =0x130 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 55 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index eb0c1f504678..5557dfb3dd68 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -241,59 +241,67 @@ struct sock { #define sk_bind_node __sk_common.skc_bind_node #define sk_prot __sk_common.skc_prot #define sk_net __sk_common.skc_net - kmemcheck_bitfield_begin(flags); - unsigned int sk_shutdown : 2, - sk_no_check : 2, - sk_userlocks : 4, - sk_protocol : 8, - sk_type : 16; - kmemcheck_bitfield_end(flags); - int sk_rcvbuf; socket_lock_t sk_lock; + struct sk_buff_head sk_receive_queue; /* * The backlog queue is special, it is always used with * the per-socket spinlock held and requires low latency * access. Therefore we special case it's implementation. + * Note : rmem_alloc is in this structure to fill a hole + * on 64bit arches, not because its logically part of + * backlog. */ struct { - struct sk_buff *head; - struct sk_buff *tail; - int len; + atomic_t rmem_alloc; + int len; + struct sk_buff *head; + struct sk_buff *tail; } sk_backlog; +#define sk_rmem_alloc sk_backlog.rmem_alloc + int sk_forward_alloc; +#ifdef CONFIG_RPS + __u32 sk_rxhash; +#endif + atomic_t sk_drops; + int sk_rcvbuf; + + struct sk_filter __rcu *sk_filter; struct socket_wq *sk_wq; - struct dst_entry *sk_dst_cache; + +#ifdef CONFIG_NET_DMA + struct sk_buff_head sk_async_wait_queue; +#endif + #ifdef CONFIG_XFRM struct xfrm_policy *sk_policy[2]; #endif + unsigned long sk_flags; + struct dst_entry *sk_dst_cache; spinlock_t sk_dst_lock; - atomic_t sk_rmem_alloc; atomic_t sk_wmem_alloc; atomic_t sk_omem_alloc; int sk_sndbuf; - struct sk_buff_head sk_receive_queue; struct sk_buff_head sk_write_queue; -#ifdef CONFIG_NET_DMA - struct sk_buff_head sk_async_wait_queue; -#endif + kmemcheck_bitfield_begin(flags); + unsigned int sk_shutdown : 2, + sk_no_check : 2, + sk_userlocks : 4, + sk_protocol : 8, + sk_type : 16; + kmemcheck_bitfield_end(flags); int sk_wmem_queued; - int sk_forward_alloc; gfp_t sk_allocation; int sk_route_caps; int sk_route_nocaps; int sk_gso_type; unsigned int sk_gso_max_size; int sk_rcvlowat; -#ifdef CONFIG_RPS - __u32 sk_rxhash; -#endif - unsigned long sk_flags; unsigned long sk_lingertime; struct sk_buff_head sk_error_queue; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, sk_err_soft; - atomic_t sk_drops; unsigned short sk_ack_backlog; unsigned short sk_max_ack_backlog; __u32 sk_priority; @@ -301,7 +309,6 @@ struct sock { const struct cred *sk_peer_cred; long sk_rcvtimeo; long sk_sndtimeo; - struct sk_filter __rcu *sk_filter; void *sk_protinfo; struct timer_list sk_timer; ktime_t sk_stamp; -- cgit v1.2.3-59-g8ed1b From f23a478075659db8a4fd62fa6e264a8bb052cc5b Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Mon, 8 Nov 2010 11:51:06 +0200 Subject: mac80211: support hardware TX fragmentation offload The lower driver is notified when the fragmentation threshold changes and upon a reconfig of the interface. If the driver supports hardware TX fragmentation, don't fragment packets in the stack. Signed-off-by: Arik Nemtsov Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/cfg.c | 7 +++++++ net/mac80211/driver-ops.h | 14 ++++++++++++++ net/mac80211/driver-trace.h | 21 +++++++++++++++++++++ net/mac80211/tx.c | 11 +++++++++-- net/mac80211/util.c | 3 +++ 6 files changed, 60 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9fdf982d1286..6122e8a3297e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1652,6 +1652,11 @@ enum ieee80211_ampdu_mlme_action { * and IV16) for the given key from hardware. * The callback must be atomic. * + * @set_frag_threshold: Configuration of fragmentation threshold. Assign this + * if the device does fragmentation by itself; if this callback is + * implemented then the stack will not do fragmentation. + * The callback can sleep. + * * @set_rts_threshold: Configuration of RTS threshold (if device needs it) * The callback can sleep. * @@ -1765,6 +1770,7 @@ struct ieee80211_ops { struct ieee80211_low_level_stats *stats); void (*get_tkip_seq)(struct ieee80211_hw *hw, u8 hw_key_idx, u32 *iv32, u16 *iv16); + int (*set_frag_threshold)(struct ieee80211_hw *hw, u32 value); int (*set_rts_threshold)(struct ieee80211_hw *hw, u32 value); int (*sta_add)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 18bd0e550600..3df12f7d0cfe 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1299,6 +1299,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) struct ieee80211_local *local = wiphy_priv(wiphy); int err; + if (changed & WIPHY_PARAM_FRAG_THRESHOLD) { + err = drv_set_frag_threshold(local, wiphy->frag_threshold); + + if (err) + return err; + } + if (changed & WIPHY_PARAM_COVERAGE_CLASS) { err = drv_set_coverage_class(local, wiphy->coverage_class); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 16983825f8e8..79019f94f621 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -233,6 +233,20 @@ static inline void drv_get_tkip_seq(struct ieee80211_local *local, trace_drv_get_tkip_seq(local, hw_key_idx, iv32, iv16); } +static inline int drv_set_frag_threshold(struct ieee80211_local *local, + u32 value) +{ + int ret = 0; + + might_sleep(); + + trace_drv_set_frag_threshold(local, value); + if (local->ops->set_frag_threshold) + ret = local->ops->set_frag_threshold(&local->hw, value); + trace_drv_return_int(local, ret); + return ret; +} + static inline int drv_set_rts_threshold(struct ieee80211_local *local, u32 value) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 6831fb1641c8..431d65500d6a 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -531,6 +531,27 @@ TRACE_EVENT(drv_get_tkip_seq, ) ); +TRACE_EVENT(drv_set_frag_threshold, + TP_PROTO(struct ieee80211_local *local, u32 value), + + TP_ARGS(local, value), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, value) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->value = value; + ), + + TP_printk( + LOCAL_PR_FMT " value:%d", + LOCAL_PR_ARG, __entry->value + ) +); + TRACE_EVENT(drv_set_rts_threshold, TP_PROTO(struct ieee80211_local *local, u32 value), diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 96c594309506..b392876af7d8 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1033,6 +1033,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, struct ieee80211_radiotap_header *rthdr = (struct ieee80211_radiotap_header *) skb->data; struct ieee80211_supported_band *sband; + bool hw_frag; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, NULL); @@ -1042,6 +1043,9 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; tx->flags &= ~IEEE80211_TX_FRAGMENTED; + /* packet is fragmented in HW if we have a non-NULL driver callback */ + hw_frag = (tx->local->ops->set_frag_threshold != NULL); + /* * for every radiotap entry that is present * (ieee80211_radiotap_iterator_next returns -ENOENT when no more @@ -1078,7 +1082,8 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, } if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP) info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT; - if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) + if ((*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG) && + !hw_frag) tx->flags |= IEEE80211_TX_FRAGMENTED; break; @@ -1181,8 +1186,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, /* * Set this flag (used below to indicate "automatic fragmentation"), * it will be cleared/left by radiotap as desired. + * Only valid when fragmentation is done by the stack. */ - tx->flags |= IEEE80211_TX_FRAGMENTED; + if (!local->ops->set_frag_threshold) + tx->flags |= IEEE80211_TX_FRAGMENTED; /* process and remove the injection radiotap header */ if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0b6fc92bc0d7..e486286ebf1a 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1152,6 +1152,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) } mutex_unlock(&local->sta_mtx); + /* setup fragmentation threshold */ + drv_set_frag_threshold(local, hw->wiphy->frag_threshold); + /* setup RTS threshold */ drv_set_rts_threshold(local, hw->wiphy->rts_threshold); -- cgit v1.2.3-59-g8ed1b From afe0cbf87500f0585d217deb8c6fd329793a7957 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 10 Nov 2010 12:50:50 +0900 Subject: cfg80211: Add nl80211 antenna configuration Allow setting of TX and RX antennas configuration via nl80211. The antenna configuration is defined as a bitmap of allowed antennas to use. This API can be used to mask out antennas which are not attached or should not be used for other reasons like regulatory concerns or special setups. Separate bitmaps are used for RX and TX to allow configuring different antennas for receiving and transmitting. Each bitmap is 32 bit long, each bit representing one antenna, starting with antenna 1 at the first bit. If an antenna bit is set, this means the driver is allowed to use this antenna for RX or TX respectively; if the bit is not set the hardware is not allowed to use this antenna. Using bitmaps has the benefit of allowing for a flexible configuration interface which can support many different configurations and which can be used for 802.11n as well as non-802.11n devices. Instead of relying on some hardware specific assumptions, drivers can use this information to know which antennas are actually attached to the system and derive their capabilities based on that. 802.11n devices should enable or disable chains, based on which antennas are present (If all antennas belonging to a particular chain are disabled, the entire chain should be disabled). HT capabilities (like STBC, TX Beamforming, Antenna selection) should be calculated based on the available chains after applying the antenna masks. Should a 802.11n device have diversity antennas attached to one of their chains, diversity can be enabled or disabled based on the antenna information. Non-802.11n drivers can use the antenna masks to select RX and TX antennas and to enable or disable antenna diversity. While covering chainmasks for 802.11n and the standard "legacy" modes "fixed antenna 1", "fixed antenna 2" and "diversity" this API also allows more rare, but useful configurations as follows: 1) Send on antenna 1, receive on antenna 2 (or vice versa). This can be used to have a low gain antenna for TX in order to keep within the regulatory constraints and a high gain antenna for RX in order to receive weaker signals ("speak softly, but listen harder"). This can be useful for building long-shot outdoor links. Another usage of this setup is having a low-noise pre-amplifier on antenna 1 and a power amplifier on the other antenna. This way transmit noise is mostly kept out of the low noise receive channel. (This would be bitmaps: tx 1 rx 2). 2) Another similar setup is: Use RX diversity on both antennas, but always send on antenna 1. Again that would allow us to benefit from a higher gain RX antenna, while staying within the legal limits. (This would be: tx 0 rx 3). 3) And finally there can be special experimental setups in research and development even with pre 802.11n hardware where more than 2 antennas are available. It's good to keep the API simple, yet flexible. Signed-off-by: Bruno Randolf -- v7: Made bitmasks 32 bit wide and rebased to latest wireless-testing. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++++++++++ include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 31 ++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index fb877b5621b7..17c5c8849250 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -804,6 +804,28 @@ enum nl80211_commands { * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly * means support for per-station GTKs. * + * @NL80211_ATTR_WIPHY_ANTENNA_TX: Bitmap of allowed antennas for transmitting. + * This can be used to mask out antennas which are not attached or should + * not be used for transmitting. If an antenna is not selected in this + * bitmap the hardware is not allowed to transmit on this antenna. + * + * Each bit represents one antenna, starting with antenna 1 at the first + * bit. Depending on which antennas are selected in the bitmap, 802.11n + * drivers can derive which chainmasks to use (if all antennas belonging to + * a particular chain are disabled this chain should be disabled) and if + * a chain has diversity antennas wether diversity should be used or not. + * HT capabilities (STBC, TX Beamforming, Antenna selection) can be + * derived from the available chains after applying the antenna mask. + * Non-802.11n drivers can derive wether to use diversity or not. + * Drivers may reject configurations or RX/TX mask combinations they cannot + * support by returning -EINVAL. + * + * @NL80211_ATTR_WIPHY_ANTENNA_RX: Bitmap of allowed antennas for receiving. + * This can be used to mask out antennas which are not attached or should + * not be used for receiving. If an antenna is not selected in this bitmap + * the hardware should not be configured to receive on this antenna. + * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -973,6 +995,9 @@ enum nl80211_attrs { NL80211_ATTR_SUPPORT_IBSS_RSN, + NL80211_ATTR_WIPHY_ANTENNA_TX, + NL80211_ATTR_WIPHY_ANTENNA_RX, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e5702f5ac57c..07425e648a09 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1304,6 +1304,9 @@ struct cfg80211_ops { void (*mgmt_frame_register)(struct wiphy *wiphy, struct net_device *dev, u16 frame_type, bool reg); + + int (*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant); }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c506241f8637..5e4dda4c0fd3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -166,7 +166,11 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, + + [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -526,7 +530,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, dev->wiphy.rts_threshold); NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, dev->wiphy.coverage_class); - NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, @@ -545,6 +548,16 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + if (dev->ops->get_antenna) { + u32 tx_ant = 0, rx_ant = 0; + int res; + res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); + if (!res) { + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant); + } + } + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -1024,6 +1037,22 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) goto bad_res; } + if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && + info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { + u32 tx_ant, rx_ant; + if (!rdev->ops->set_antenna) { + result = -EOPNOTSUPP; + goto bad_res; + } + + tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); + rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); + if (result) + goto bad_res; + } + changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { -- cgit v1.2.3-59-g8ed1b From 15d967532148a5fcda075282b82a271b6595a386 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 10 Nov 2010 12:50:56 +0900 Subject: mac80211: Add antenna configuration Allow antenna configuration by calling driver's function for it. We disallow antenna configuration if the wiphy is already running, mainly to make life easier for 802.11n drivers which need to recalculate HT capabilites. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ net/mac80211/cfg.c | 19 +++++++++++++++++ net/mac80211/driver-ops.h | 23 +++++++++++++++++++++ net/mac80211/driver-trace.h | 50 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 94 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 6122e8a3297e..a7323eca08d1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1799,6 +1799,8 @@ struct ieee80211_ops { void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); int (*napi_poll)(struct ieee80211_hw *hw, int budget); + int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); + int (*get_antenna)(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3df12f7d0cfe..0c544074479e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1628,6 +1628,23 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, ieee80211_queue_work(&local->hw, &local->reconfig_filter); } +static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (local->started) + return -EOPNOTSUPP; + + return drv_set_antenna(local, tx_ant, rx_ant); +} + +static int ieee80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + return drv_get_antenna(local, tx_ant, rx_ant); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1680,4 +1697,6 @@ struct cfg80211_ops mac80211_config_ops = { .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, .mgmt_frame_register = ieee80211_mgmt_frame_register, + .set_antenna = ieee80211_set_antenna, + .get_antenna = ieee80211_get_antenna, }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 79019f94f621..4244554d218a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -442,4 +442,27 @@ static inline void drv_channel_switch(struct ieee80211_local *local, trace_drv_return_void(local); } + +static inline int drv_set_antenna(struct ieee80211_local *local, + u32 tx_ant, u32 rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->set_antenna) + ret = local->ops->set_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_set_antenna(local, tx_ant, rx_ant, ret); + return ret; +} + +static inline int drv_get_antenna(struct ieee80211_local *local, + u32 *tx_ant, u32 *rx_ant) +{ + int ret = -EOPNOTSUPP; + might_sleep(); + if (local->ops->get_antenna) + ret = local->ops->get_antenna(&local->hw, tx_ant, rx_ant); + trace_drv_get_antenna(local, *tx_ant, *rx_ant, ret); + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 431d65500d6a..c2772f23ac9c 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -883,6 +883,56 @@ TRACE_EVENT(drv_channel_switch, ) ); +TRACE_EVENT(drv_set_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + +TRACE_EVENT(drv_get_antenna, + TP_PROTO(struct ieee80211_local *local, u32 tx_ant, u32 rx_ant, int ret), + + TP_ARGS(local, tx_ant, rx_ant, ret), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(u32, tx_ant) + __field(u32, rx_ant) + __field(int, ret) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->tx_ant = tx_ant; + __entry->rx_ant = rx_ant; + __entry->ret = ret; + ), + + TP_printk( + LOCAL_PR_FMT " tx_ant:%d rx_ant:%d ret:%d", + LOCAL_PR_ARG, __entry->tx_ant, __entry->rx_ant, __entry->ret + ) +); + /* * Tracing for API calls that drivers call. */ -- cgit v1.2.3-59-g8ed1b From a619a4c0e1fd4e8c360c63d0df3fa0a401107d69 Mon Sep 17 00:00:00 2001 From: Juuso Oikarinen Date: Thu, 11 Nov 2010 08:50:18 +0200 Subject: mac80211: Add function to get probe request template for current AP Chipsets with hardware based connection monitoring need to autonomically send directed probe-request frames to the AP (in the event of beacon loss, for example.) For the hardware to be able to do this, it requires a template for the frame to transmit to the AP, filled in with the BSSID and SSID of the AP, but also the supported rate IE's. This patch adds a function to mac80211, which allows the hardware driver to fetch this template after association, so it can be configured to the hardware. Signed-off-by: Juuso Oikarinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 15 +++++++++++++++ net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/mlme.c | 24 ++++++++++++++++++++++++ net/mac80211/util.c | 23 ++++++++++++++++++----- 4 files changed, 61 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a7323eca08d1..af7e84199e62 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2508,6 +2508,21 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, void ieee80211_sta_block_awake(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, bool block); +/** + * ieee80211_ap_probereq_get - retrieve a Probe Request template + * @hw: pointer obtained from ieee80211_alloc_hw(). + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * + * Creates a Probe Request template which can, for example, be uploaded to + * hardware. The template is filled with bssid, ssid and supported rate + * information. This function must only be called from within the + * .bss_info_changed callback function and only in managed mode. The function + * is only useful when the interface is associated, otherwise it will return + * NULL. + */ +struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + /** * ieee80211_beacon_loss - inform hardware does not receive beacons * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b80c38689927..59a1d38212fd 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1287,6 +1287,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, const u8 *ie, size_t ie_len, enum ieee80211_band band, u32 rate_mask, u8 channel); +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, const u8 *ssid, size_t ssid_len, const u8 *ie, size_t ie_len); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a3a9421555af..dfc4a316ac1c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1108,6 +1108,30 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata, mutex_unlock(&ifmgd->mtx); } +struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, + struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct sk_buff *skb; + const u8 *ssid; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return NULL; + + ASSERT_MGD_MTX(ifmgd); + + if (!ifmgd->associated) + return NULL; + + ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); + skb = ieee80211_build_probe_req(sdata, ifmgd->associated->bssid, + ssid + 2, ssid[1], NULL, 0); + + return skb; +} +EXPORT_SYMBOL(ieee80211_ap_probereq_get); + static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e486286ebf1a..68d0518254dd 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1011,9 +1011,10 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, return pos - buffer; } -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - const u8 *ssid, size_t ssid_len, - const u8 *ie, size_t ie_len) +struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata, + u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; @@ -1027,7 +1028,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, if (!buf) { printk(KERN_DEBUG "%s: failed to allocate temporary IE " "buffer\n", sdata->name); - return; + return NULL; } chan = ieee80211_frequency_to_channel( @@ -1050,8 +1051,20 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, } IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; - ieee80211_tx_skb(sdata, skb); kfree(buf); + + return skb; +} + +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + const u8 *ssid, size_t ssid_len, + const u8 *ie, size_t ie_len) +{ + struct sk_buff *skb; + + skb = ieee80211_build_probe_req(sdata, dst, ssid, ssid_len, ie, ie_len); + if (skb) + ieee80211_tx_skb(sdata, skb); } u32 ieee80211_sta_get_rates(struct ieee80211_local *local, -- cgit v1.2.3-59-g8ed1b From 885a46d0f7942d76c2f3860acb45f75237d3bb42 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 11 Nov 2010 15:07:22 +0100 Subject: cfg80211: add support for setting the ad-hoc multicast rate Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 5 +++++ 3 files changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 17c5c8849250..037b4e498890 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -826,6 +826,8 @@ enum nl80211_commands { * the hardware should not be configured to receive on this antenna. * For a more detailed descripton see @NL80211_ATTR_WIPHY_ANTENNA_TX. * + * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -998,6 +1000,8 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_ANTENNA_TX, NL80211_ATTR_WIPHY_ANTENNA_RX, + NL80211_ATTR_MCAST_RATE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 07425e648a09..8fd9eebd0cc9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -923,6 +923,7 @@ struct cfg80211_disassoc_request { * @privacy: this is a protected network, keys will be configured * after joining * @basic_rates: bitmap of basic rates to use when creating the IBSS + * @mcast_rate: multicast tx rate (in 100 kbps) */ struct cfg80211_ibss_params { u8 *ssid; @@ -934,6 +935,7 @@ struct cfg80211_ibss_params { u32 basic_rates; bool channel_fixed; bool privacy; + int mcast_rate; }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5e4dda4c0fd3..605553842226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -171,6 +171,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, + + [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -3681,6 +3683,9 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[NL80211_ATTR_MCAST_RATE]) + ibss.mcast_rate = + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, -- cgit v1.2.3-59-g8ed1b From 8f0729b16ae354f9db89394fc1d2d65003455d56 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 11 Nov 2010 15:07:23 +0100 Subject: mac80211: add support for setting the ad-hoc multicast rate Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++-- net/mac80211/ibss.c | 1 + net/mac80211/rate.c | 19 +++++++++++++++---- net/mac80211/tx.c | 5 +++-- 4 files changed, 23 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index af7e84199e62..1248369a7c30 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -205,6 +205,7 @@ enum ieee80211_bss_change { * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. + * @mcast_rate: multicast rate for AP and Ad-Hoc (in 100 kbps) * @bssid: The BSSID for this BSS * @enable_beacon: whether beaconing should be enabled or not * @channel_type: Channel type for this BSS -- the hardware might be @@ -244,6 +245,7 @@ struct ieee80211_bss_conf { u16 assoc_capability; u64 timestamp; u32 basic_rates; + u32 mcast_rate; u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; @@ -2663,7 +2665,7 @@ enum rate_control_changed { * @rate_idx_mask: user-requested rate mask (not MCS for now) * @skb: the skb that will be transmitted, the control information in it needs * to be filled in - * @ap: whether this frame is sent out in AP mode + * @bss: whether this frame is sent out in AP or IBSS mode */ struct ieee80211_tx_rate_control { struct ieee80211_hw *hw; @@ -2674,7 +2676,7 @@ struct ieee80211_tx_rate_control { bool rts, short_preamble; u8 max_rate_idx; u32 rate_idx_mask; - bool ap; + bool bss; }; struct rate_control_ops { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 239c4836a946..6fe6837dc134 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -915,6 +915,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.basic_rates = params->basic_rates; + sdata->vif.bss_conf.mcast_rate = params->mcast_rate; sdata->vif.bss_conf.beacon_int = params->beacon_interval; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 33f76993da08..76de4f8d9327 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -211,10 +211,20 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); } -static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) +static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u32 mcast_rate, + struct ieee80211_supported_band *sband) { u8 i; + if (mcast_rate) { + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == mcast_rate) { + *idx = i; + return; + } + } + } + if (basic_rates == 0) return; /* assume basic rates unknown and accept rate */ if (*idx < 0) @@ -222,7 +232,7 @@ static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) if (basic_rates & (1 << *idx)) return; /* selected rate is a basic rate */ - for (i = *idx + 1; i <= max_rate_idx; i++) { + for (i = *idx + 1; i <= sband->n_bitrates; i++) { if (basic_rates & (1 << i)) { *idx = i; return; @@ -243,10 +253,11 @@ bool rate_control_send_low(struct ieee80211_sta *sta, info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; - if (!sta && txrc->ap) + if (!sta && txrc->bss) rc_send_low_broadcast(&info->control.rates[0].idx, txrc->bss_conf->basic_rates, - txrc->sband->n_bitrates); + txrc->bss_conf->mcast_rate, + txrc->sband); return true; } return false; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b392876af7d8..e69483647f33 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -622,7 +622,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP; + txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP || + tx->sdata->vif.type == NL80211_IFTYPE_ADHOC); /* set up RTS protection if desired */ if (len > tx->local->hw.wiphy->rts_threshold) { @@ -2308,7 +2309,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, txrc.max_rate_idx = -1; else txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1; - txrc.ap = true; + txrc.bss = true; rate_control_get_rate(sdata, NULL, &txrc); info->control.vif = vif; -- cgit v1.2.3-59-g8ed1b From f8ff182c716c6f11ca3061961f5722f26a14e101 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 16 Nov 2010 04:30:14 +0000 Subject: rtnetlink: Link address family API Each net_device contains address family specific data such as per device settings and statistics. We already expose this data via procfs/sysfs and partially netlink. The netlink method requires the requester to send one RTM_GETLINK request for each address family it wishes to receive data of and then merge this data itself. This patch implements a new API which combines all address family specific link data in a new netlink attribute IFLA_AF_SPEC. IFLA_AF_SPEC contains a sequence of nested attributes, one for each address family which in turn defines the structure of its own attribute. Example: [IFLA_AF_SPEC] = { [AF_INET] = { [IFLA_INET_CONF] = ..., }, [AF_INET6] = { [IFLA_INET6_FLAGS] = ..., [IFLA_INET6_CONF] = ..., } } The API also allows for address families to implement a function which parses the IFLA_AF_SPEC attribute sent by userspace to implement address family specific link options. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/if_link.h | 19 +++++++ include/net/rtnetlink.h | 31 ++++++++++ net/core/rtnetlink.c | 147 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 195 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 2fc66dd783ee..443d04a66a79 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -80,6 +80,24 @@ struct rtnl_link_ifmap { __u8 port; }; +/* + * IFLA_AF_SPEC + * Contains nested attributes for address family specific attributes. + * Each address family may create a attribute with the address family + * number as type and create its own attribute structure in it. + * + * Example: + * [IFLA_AF_SPEC] = { + * [AF_INET] = { + * [IFLA_INET_CONF] = ..., + * }, + * [AF_INET6] = { + * [IFLA_INET6_FLAGS] = ..., + * [IFLA_INET6_CONF] = ..., + * } + * } + */ + enum { IFLA_UNSPEC, IFLA_ADDRESS, @@ -116,6 +134,7 @@ enum { IFLA_STATS64, IFLA_VF_PORTS, IFLA_PORT_SELF, + IFLA_AF_SPEC, __IFLA_MAX }; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e013c68bfb00..35be0bbcd7da 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -83,6 +83,37 @@ extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); +/** + * struct rtnl_af_ops - rtnetlink address family operations + * + * @list: Used internally + * @family: Address family + * @fill_link_af: Function to fill IFLA_AF_SPEC with address family + * specific netlink attributes. + * @get_link_af_size: Function to calculate size of address family specific + * netlink attributes exlusive the container attribute. + * @parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify + * net_device accordingly. + */ +struct rtnl_af_ops { + struct list_head list; + int family; + + int (*fill_link_af)(struct sk_buff *skb, + const struct net_device *dev); + size_t (*get_link_af_size)(const struct net_device *dev); + + int (*parse_link_af)(struct net_device *dev, + const struct nlattr *attr); +}; + +extern int __rtnl_af_register(struct rtnl_af_ops *ops); +extern void __rtnl_af_unregister(struct rtnl_af_ops *ops); + +extern int rtnl_af_register(struct rtnl_af_ops *ops); +extern void rtnl_af_unregister(struct rtnl_af_ops *ops); + + extern struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]); extern struct net_device *rtnl_create_link(struct net *src_net, struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 841c287ef40a..bf69e5871b1a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -362,6 +362,95 @@ static size_t rtnl_link_get_size(const struct net_device *dev) return size; } +static LIST_HEAD(rtnl_af_ops); + +static const struct rtnl_af_ops *rtnl_af_lookup(const int family) +{ + const struct rtnl_af_ops *ops; + + list_for_each_entry(ops, &rtnl_af_ops, list) { + if (ops->family == family) + return ops; + } + + return NULL; +} + +/** + * __rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * The caller must hold the rtnl_mutex. + * + * Returns 0 on success or a negative error code. + */ +int __rtnl_af_register(struct rtnl_af_ops *ops) +{ + list_add_tail(&ops->list, &rtnl_af_ops); + return 0; +} +EXPORT_SYMBOL_GPL(__rtnl_af_register); + +/** + * rtnl_af_register - Register rtnl_af_ops with rtnetlink. + * @ops: struct rtnl_af_ops * to register + * + * Returns 0 on success or a negative error code. + */ +int rtnl_af_register(struct rtnl_af_ops *ops) +{ + int err; + + rtnl_lock(); + err = __rtnl_af_register(ops); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(rtnl_af_register); + +/** + * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + * + * The caller must hold the rtnl_mutex. + */ +void __rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + list_del(&ops->list); +} +EXPORT_SYMBOL_GPL(__rtnl_af_unregister); + +/** + * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. + * @ops: struct rtnl_af_ops * to unregister + */ +void rtnl_af_unregister(struct rtnl_af_ops *ops) +{ + rtnl_lock(); + __rtnl_af_unregister(ops); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(rtnl_af_unregister); + +static size_t rtnl_link_get_af_size(const struct net_device *dev) +{ + struct rtnl_af_ops *af_ops; + size_t size; + + /* IFLA_AF_SPEC */ + size = nla_total_size(sizeof(struct nlattr)); + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->get_link_af_size) { + /* AF_* + nested data */ + size += nla_total_size(sizeof(struct nlattr)) + + af_ops->get_link_af_size(dev); + } + } + + return size; +} + static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; @@ -671,7 +760,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev) + nla_total_size(4) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ - + rtnl_link_get_size(dev); /* IFLA_LINKINFO */ + + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + + rtnl_link_get_af_size(dev); /* IFLA_AF_SPEC */ } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -757,7 +847,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats; - struct nlattr *attr; + struct nlattr *attr, *af_spec; + struct rtnl_af_ops *af_ops; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -866,6 +957,36 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } + if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + goto nla_put_failure; + + list_for_each_entry(af_ops, &rtnl_af_ops, list) { + if (af_ops->fill_link_af) { + struct nlattr *af; + int err; + + if (!(af = nla_nest_start(skb, af_ops->family))) + goto nla_put_failure; + + err = af_ops->fill_link_af(skb, dev); + + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + goto nla_put_failure; + + nla_nest_end(skb, af); + } + } + + nla_nest_end(skb, af_spec); + return nlmsg_end(skb, nlh); nla_put_failure: @@ -924,6 +1045,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, + [IFLA_AF_SPEC] = { .type = NLA_NESTED }, }; EXPORT_SYMBOL(ifla_policy); @@ -1225,6 +1347,27 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, goto errout; modified = 1; } + + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + continue; + + if (!af_ops->parse_link_af) + continue; + + err = af_ops->parse_link_af(dev, af); + if (err < 0) + goto errout; + + modified = 1; + } + } err = 0; errout: -- cgit v1.2.3-59-g8ed1b From 5811662b15db018c740c57d037523683fd3e6123 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 12 Nov 2010 18:43:55 +0000 Subject: net: use the macros defined for the members of flowi Use the macros defined for the members of flowi to clean the code up. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- include/net/route.h | 12 +++++------- net/atm/clip.c | 3 ++- net/bridge/br_netfilter.c | 9 ++------- net/dccp/ipv4.c | 13 +++++-------- net/decnet/dn_route.c | 22 +++++++++------------ net/decnet/dn_rules.c | 2 +- net/ipv4/af_inet.c | 18 +++++------------ net/ipv4/arp.c | 12 ++++++------ net/ipv4/fib_frontend.c | 28 ++++++++------------------- net/ipv4/fib_semantics.c | 8 ++------ net/ipv4/icmp.c | 28 +++++++++------------------ net/ipv4/igmp.c | 8 +++----- net/ipv4/inet_connection_sock.c | 15 ++++++-------- net/ipv4/ip_gre.c | 31 +++++++++-------------------- net/ipv4/ip_output.c | 25 ++++++++++-------------- net/ipv4/ipip.c | 20 ++++++------------- net/ipv4/ipmr.c | 18 +++++------------ net/ipv4/netfilter.c | 8 ++++---- net/ipv4/raw.c | 7 +++---- net/ipv4/route.c | 43 ++++++++++++++++------------------------- net/ipv4/syncookies.c | 15 ++++++-------- net/ipv4/udp.c | 12 +++++------- net/ipv4/xfrm4_policy.c | 8 ++------ net/ipv6/ip6mr.c | 4 +--- net/ipv6/netfilter.c | 6 ++---- net/ipv6/route.c | 24 ++++++----------------- net/ipv6/sit.c | 14 ++++++-------- net/l2tp/l2tp_ip.c | 12 +++++------- net/netfilter/ipvs/ip_vs_ctl.c | 6 ++---- net/netfilter/ipvs/ip_vs_xmit.c | 34 ++++++++------------------------ net/netfilter/xt_TEE.c | 12 ++++++------ net/rxrpc/ar-peer.c | 10 +++++----- 32 files changed, 171 insertions(+), 316 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5cd46d1c0e14..b8c1f7703fc6 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -169,14 +169,12 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, { struct flowi fl = { .oif = oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = { .daddr = dst, - .saddr = src, - .tos = tos } }, + .fl4_dst = dst, + .fl4_src = src, + .fl4_tos = tos, .proto = protocol, - .uli_u = { .ports = - { .sport = sport, - .dport = dport } } }; - + .fl_ip_sport = sport, + .fl_ip_dport = dport }; int err; struct net *net = sock_net(sk); diff --git a/net/atm/clip.c b/net/atm/clip.c index ff956d1115bc..d257da50fcfb 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -502,7 +502,8 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) struct atmarp_entry *entry; int error; struct clip_vcc *clip_vcc; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = 1 }; struct rtable *rt; if (vcc->push != clip_push) { diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index ce8b2eed4e73..6e1392093911 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -413,13 +413,8 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) if (dnat_took_place(skb)) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = 0, - .tos = RT_TOS(iph->tos) }, - }, - .proto = 0, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), }; struct in_device *in_dev = __in_dev_get_rcu(dev); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3f69ea114829..45a434f94169 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -462,15 +462,12 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, { struct rtable *rt; struct flowi fl = { .oif = skb_rtable(skb)->rt_iif, - .nl_u = { .ip4_u = - { .daddr = ip_hdr(skb)->saddr, - .saddr = ip_hdr(skb)->daddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ip_hdr(skb)->saddr, + .fl4_src = ip_hdr(skb)->daddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } - } + .fl_ip_sport = dccp_hdr(skb)->dccph_dport, + .fl_ip_dport = dccp_hdr(skb)->dccph_sport }; security_skb_classify_flow(skb, &fl); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 474d54dd08c2..8280e43c8861 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -271,10 +271,10 @@ static void dn_dst_link_failure(struct sk_buff *skb) static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | - (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | + return ((fl1->fld_dst ^ fl2->fld_dst) | + (fl1->fld_src ^ fl2->fld_src) | (fl1->mark ^ fl2->mark) | - (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | + (fl1->fld_scope ^ fl2->fld_scope) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -882,11 +882,9 @@ static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_re static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) { - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = oldflp->fld_dst, - .saddr = oldflp->fld_src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = oldflp->fld_dst, + .fld_src = oldflp->fld_src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = oldflp->mark, .iif = init_net.loopback_dev->ifindex, .oif = oldflp->oif }; @@ -1230,11 +1228,9 @@ static int dn_route_input_slow(struct sk_buff *skb) int flags = 0; __le16 gateway = 0; __le16 local_src = 0; - struct flowi fl = { .nl_u = { .dn_u = - { .daddr = cb->dst, - .saddr = cb->src, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fld_dst = cb->dst, + .fld_src = cb->src, + .fld_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = skb->dev->ifindex }; struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 48fdf10be7a1..6eb91df3c550 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -175,7 +175,7 @@ static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, unsigned dnet_addr_type(__le16 addr) { - struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; + struct flowi fl = { .fld_dst = addr }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f581f77d1097..f2b61107df6c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1148,21 +1148,13 @@ int inet_sk_rebuild_header(struct sock *sk) struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk), - }, - }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { - .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport, - }, - }, + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport, }; security_sk_classify_flow(sk, &fl); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d8e540c5b071..7833f17b648a 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -433,8 +433,8 @@ static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip) static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, - .saddr = tip } } }; + struct flowi fl = { .fl4_dst = sip, + .fl4_src = tip }; struct rtable *rt; int flag = 0; /*unsigned long now; */ @@ -1061,8 +1061,8 @@ static int arp_req_set(struct net *net, struct arpreq *r, if (r->arp_flags & ATF_PERM) r->arp_flags |= ATF_COM; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) @@ -1169,8 +1169,8 @@ static int arp_req_delete(struct net *net, struct arpreq *r, ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; if (dev == NULL) { - struct flowi fl = { .nl_u.ip4_u = { .daddr = ip, - .tos = RTO_ONLINK } }; + struct flowi fl = { .fl4_dst = ip, + .fl4_tos = RTO_ONLINK }; struct rtable *rt; err = ip_route_output_key(net, &rt, &fl); if (err != 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..d3a1112b9d9c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -158,11 +158,7 @@ static void fib_flush(struct net *net) struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = addr - } - }, + .fl4_dst = addr, .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res = { 0 }; @@ -193,7 +189,7 @@ static inline unsigned __inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { .fl4_dst = addr }; struct fib_result res; unsigned ret = RTN_BROADCAST; struct fib_table *local_table; @@ -247,13 +243,9 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, { struct in_device *in_dev; struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = src, - .saddr = dst, - .tos = tos - } - }, + .fl4_dst = src, + .fl4_src = dst, + .fl4_tos = tos, .mark = mark, .iif = oif }; @@ -853,13 +845,9 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) struct fib_result res; struct flowi fl = { .mark = frn->fl_mark, - .nl_u = { - .ip4_u = { - .daddr = frn->fl_addr, - .tos = frn->fl_tos, - .scope = frn->fl_scope - } - } + .fl4_dst = frn->fl_addr, + .fl4_tos = frn->fl_tos, + .fl4_scope = frn->fl_scope, }; #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3e0da3ef6116..12d3dc3df1b7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -563,12 +563,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, rcu_read_lock(); { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = nh->nh_gw, - .scope = cfg->fc_scope + 1, - }, - }, + .fl4_dst = nh->nh_gw, + .fl4_scope = cfg->fc_scope + 1, .oif = nh->nh_oif, }; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c6e2affafbd3..4daebd17b6ed 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -386,10 +386,9 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) daddr = icmp_param->replyopts.faddr; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, + struct flowi fl = { .fl4_dst= daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), .proto = IPPROTO_ICMP }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(net, &rt, &fl)) @@ -542,22 +541,13 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = icmp_param.replyopts.srr ? - icmp_param.replyopts.faddr : - iph->saddr, - .saddr = saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = icmp_param.replyopts.srr ? + icmp_param.replyopts.faddr : iph->saddr, + .fl4_src = saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_ICMP, - .uli_u = { - .icmpt = { - .type = type, - .code = code - } - } + .fl_icmp_type = type, + .fl_icmp_code = code, }; int err; struct rtable *rt2; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a1bf2f49e716..afb1e82a59f9 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -314,8 +314,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { - .daddr = IGMPV3_ALL_MCR } }, + .fl4_dst = IGMPV3_ALL_MCR, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) { kfree_skb(skb); @@ -660,7 +659,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, { struct flowi fl = { .oif = dev->ifindex, - .nl_u = { .ip4_u = { .daddr = dst } }, + .fl4_dst = dst, .proto = IPPROTO_IGMP }; if (ip_route_output_key(net, &rt, &fl)) return -1; @@ -1425,8 +1424,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev) /* RTNL is locked */ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = imr->imr_multiaddr.s_addr } } }; + struct flowi fl = { .fl4_dst = imr->imr_multiaddr.s_addr }; struct rtable *rt; struct net_device *dev = NULL; struct in_device *idev = NULL; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7174370b1195..06f5f8f482f0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -358,17 +358,14 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, struct ip_options *opt = inet_rsk(req)->opt; struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet_sk(sk)->inet_sport, - .dport = ireq->rmt_port } } }; + .fl_ip_sport = inet_sk(sk)->inet_sport, + .fl_ip_dport = ireq->rmt_port }; struct net *net = sock_net(sk); security_req_classify_flow(req, &fl); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index aace653710f6..897210adaa77 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -772,14 +772,9 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, - .proto = IPPROTO_GRE, + .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), .fl_gre_key = tunnel->parms.o_key }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { @@ -951,13 +946,9 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_GRE, .fl_gre_key = tunnel->parms.o_key }; @@ -1217,13 +1208,9 @@ static int ipgre_open(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr)) { struct flowi fl = { .oif = t->parms.link, - .nl_u = { - .ip4_u = { - .daddr = t->parms.iph.daddr, - .saddr = t->parms.iph.saddr, - .tos = RT_TOS(t->parms.iph.tos) - } - }, + .fl4_dst = t->parms.iph.daddr, + .fl4_src = t->parms.iph.saddr, + .fl4_tos = RT_TOS(t->parms.iph.tos), .proto = IPPROTO_GRE, .fl_gre_key = t->parms.o_key }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 439d2a34ee44..5090c7ff525e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -341,15 +341,13 @@ int ip_queue_xmit(struct sk_buff *skb) { struct flowi fl = { .oif = sk->sk_bound_dev_if, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times @@ -1404,14 +1402,11 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { struct flowi fl = { .oif = arg->bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = rt->rt_spec_dst, - .tos = RT_TOS(ip_hdr(skb)->tos) } }, - /* Not quite clean, but right. */ - .uli_u = { .ports = - { .sport = tcp_hdr(skb)->dest, - .dport = tcp_hdr(skb)->source } }, + .fl4_dst = daddr, + .fl4_src = rt->rt_spec_dst, + .fl4_tos = RT_TOS(ip_hdr(skb)->tos), + .fl_ip_sport = tcp_hdr(skb)->dest, + .fl_ip_dport = tcp_hdr(skb)->source, .proto = sk->sk_protocol, .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cd300aaee78f..e70ad581398e 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -463,13 +463,9 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) - } - }, + .fl4_dst = dst, + .fl4_src= tiph->saddr, + .fl4_tos = RT_TOS(tos), .proto = IPPROTO_IPIP }; @@ -589,13 +585,9 @@ static void ipip_tunnel_bind_dev(struct net_device *dev) if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; struct rtable *rt; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ef2b0089e0ea..3f3a9afd73e0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1537,13 +1537,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = vif->remote, - .saddr = vif->local, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = vif->remote, + .fl4_src = vif->local, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; @@ -1553,12 +1549,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, } else { struct flowi fl = { .oif = vif->link, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .tos = RT_TOS(iph->tos) - } - }, + .fl4_dst = iph->daddr, + .fl4_tos = RT_TOS(iph->tos), .proto = IPPROTO_IPIP }; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index d88a46c54fd1..994a1f29ebbc 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -31,10 +31,10 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) * packets with foreign saddr to appear on the NF_INET_LOCAL_OUT hook. */ if (addr_type == RTN_LOCAL) { - fl.nl_u.ip4_u.daddr = iph->daddr; + fl.fl4_dst = iph->daddr; if (type == RTN_LOCAL) - fl.nl_u.ip4_u.saddr = iph->saddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.fl4_src = iph->saddr; + fl.fl4_tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; @@ -47,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->saddr; + fl.fl4_dst = iph->saddr; if (ip_route_output_key(net, &rt, &fl) != 0) return -1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 1f85ef289895..a3d5ab786e81 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -549,10 +549,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 66610ea3c87b..ec2333fb637e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -684,17 +684,17 @@ static inline bool rt_caching(const struct net *net) static inline bool compare_hash_inputs(const struct flowi *fl1, const struct flowi *fl2) { - return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return ((((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->iif ^ fl2->iif)) == 0); } static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) { - return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) | - ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) | + return (((__force u32)fl1->fl4_dst ^ (__force u32)fl2->fl4_dst) | + ((__force u32)fl1->fl4_src ^ (__force u32)fl2->fl4_src) | (fl1->mark ^ fl2->mark) | - (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) | + (*(u16 *)&fl1->fl4_tos ^ *(u16 *)&fl2->fl4_tos) | (fl1->oif ^ fl2->oif) | (fl1->iif ^ fl2->iif)) == 0; } @@ -2089,12 +2089,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, { struct fib_result res; struct in_device *in_dev = __in_dev_get_rcu(dev); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = saddr, - .tos = tos, - .scope = RT_SCOPE_UNIVERSE, - } }, + struct flowi fl = { .fl4_dst = daddr, + .fl4_src = saddr, + .fl4_tos = tos, + .fl4_scope = RT_SCOPE_UNIVERSE, .mark = skb->mark, .iif = dev->ifindex }; unsigned flags = 0; @@ -2480,14 +2478,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, const struct flowi *oldflp) { u32 tos = RT_FL_TOS(oldflp); - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = oldflp->fl4_dst, - .saddr = oldflp->fl4_src, - .tos = tos & IPTOS_RT_MASK, - .scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : - RT_SCOPE_UNIVERSE), - } }, + struct flowi fl = { .fl4_dst = oldflp->fl4_dst, + .fl4_src = oldflp->fl4_src, + .fl4_tos = tos & IPTOS_RT_MASK, + .fl4_scope = ((tos & RTO_ONLINK) ? + RT_SCOPE_LINK : RT_SCOPE_UNIVERSE), .mark = oldflp->mark, .iif = net->loopback_dev->ifindex, .oif = oldflp->oif }; @@ -2944,13 +2939,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void err = -rt->dst.error; } else { struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos, - }, - }, + .fl4_dst = dst, + .fl4_src = src, + .fl4_tos = rtm->rtm_tos, .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, .mark = mark, }; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650cace2180d..47519205a014 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -346,17 +346,14 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, */ { struct flowi fl = { .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = ((opt && opt->srr) ? + opt->faddr : ireq->rmt_addr), + .fl4_src = ireq->loc_addr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = IPPROTO_TCP, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = th->dest, - .dport = th->source } } }; + .fl_ip_sport = th->dest, + .fl_ip_dport = th->source }; security_req_classify_flow(req, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) { reqsk_free(req); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 491ecd3f7a01..b37181da487c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -890,15 +890,13 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rt == NULL) { struct flowi fl = { .oif = ipc.oif, .mark = sk->sk_mark, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, + .fl4_dst = faddr, + .fl4_src = saddr, + .fl4_tos = tos, .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = - { .sport = inet->inet_sport, - .dport = dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = dport }; struct net *net = sock_net(sk); security_sk_classify_flow(sk, &fl); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 4a8c5335770c..b057d40addec 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -23,12 +23,8 @@ static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, xfrm_address_t *daddr) { struct flowi fl = { - .nl_u = { - .ip4_u = { - .tos = tos, - .daddr = daddr->a4, - }, - }, + .fl4_dst = daddr->a4, + .fl4_tos = tos, }; struct dst_entry *dst; struct rtable *rt; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 6f32ffce7022..9fab274019c0 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1843,9 +1843,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, fl = (struct flowi) { .oif = vif->link, - .nl_u = { .ip6_u = - { .daddr = ipv6h->daddr, } - } + .fl6_dst = ipv6h->daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 7155b2451d7c..35915e8617f0 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -18,10 +18,8 @@ int ip6_route_me_harder(struct sk_buff *skb) struct flowi fl = { .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, .mark = skb->mark, - .nl_u = - { .ip6_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, } }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, }; dst = ip6_route_output(net, skb->sk, &fl); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 96455ffb76fb..c346ccf66ae1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -558,11 +558,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, { struct flowi fl = { .oif = oif, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; struct dst_entry *dst; int flags = strict ? RT6_LOOKUP_F_IFACE : 0; @@ -778,13 +774,9 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct flowi fl = { .iif = skb->dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, - }, - }, + .fl6_dst = iph->daddr, + .fl6_src = iph->saddr, + .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, .mark = skb->mark, .proto = iph->nexthdr, }; @@ -1463,12 +1455,8 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, struct ip6rd_flowi rdfl = { .fl = { .oif = dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = *dest, - .saddr = *src, - }, - }, + .fl6_dst = *dest, + .fl6_src = *src, }, }; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d6bfaec3bbbf..6e48a80d0f25 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -730,10 +730,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = dst, - .saddr = tiph->saddr, - .tos = RT_TOS(tos) } }, + struct flowi fl = { .fl4_dst = dst, + .fl4_src = tiph->saddr, + .fl4_tos = RT_TOS(tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { @@ -855,10 +854,9 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) iph = &tunnel->parms.iph; if (iph->daddr) { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos) } }, + struct flowi fl = { .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; struct rtable *rt; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 0bf6a59545ab..04635e88e8ed 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -476,15 +476,13 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m { struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = { - .daddr = daddr, - .saddr = inet->inet_saddr, - .tos = RT_CONN_FLAGS(sk) } }, + .fl4_dst = daddr, + .fl4_src = inet->inet_saddr, + .fl4_tos = RT_CONN_FLAGS(sk), .proto = sk->sk_protocol, .flags = inet_sk_flowi_flags(sk), - .uli_u = { .ports = { - .sport = inet->inet_sport, - .dport = inet->inet_dport } } }; + .fl_ip_sport = inet->inet_sport, + .fl_ip_dport = inet->inet_dport }; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f5daa30b0af..c6f293639220 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -110,10 +110,8 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) struct rt6_info *rt; struct flowi fl = { .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *addr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + .fl6_dst = *addr, + .fl6_src = { .s6_addr32 = {0, 0, 0, 0} }, }; rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 10bd39c0ae2d..5325a3fbe4ac 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -96,12 +96,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = dest->addr.ip, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = dest->addr.ip, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -118,12 +114,8 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = daddr, - .saddr = 0, - .tos = rtos, } }, + .fl4_dst = daddr, + .fl4_tos = rtos, }; if (ip_route_output_key(net, &rt, &fl)) { @@ -178,14 +170,9 @@ __ip_vs_reroute_locally(struct sk_buff *skb) refdst_drop(orefdst); } else { struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, - .tos = RT_TOS(iph->tos), - } - }, + .fl4_dst = iph->daddr, + .fl4_src = iph->saddr, + .fl4_tos = RT_TOS(iph->tos), .mark = skb->mark, }; struct rtable *rt; @@ -216,12 +203,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, { struct dst_entry *dst; struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - }, - }, + .fl6_dst = *daddr, }; dst = ip6_route_output(net, NULL, &fl); diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 22a2d421e7eb..5128a6c4cb2c 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -70,9 +70,9 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip4_u.daddr = info->gw.ip; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE; + fl.fl4_dst = info->gw.ip; + fl.fl4_tos = RT_TOS(iph->tos); + fl.fl4_scope = RT_SCOPE_UNIVERSE; if (ip_route_output_key(net, &rt, &fl) != 0) return false; @@ -150,9 +150,9 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info) return false; fl.oif = info->priv->oif; } - fl.nl_u.ip6_u.daddr = info->gw.in6; - fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | - (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; + fl.fl6_dst = info->gw.in6; + fl.fl6_flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) | + (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]; dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return false; diff --git a/net/rxrpc/ar-peer.c b/net/rxrpc/ar-peer.c index 9f1729bd60de..a53fb25a64ed 100644 --- a/net/rxrpc/ar-peer.c +++ b/net/rxrpc/ar-peer.c @@ -47,12 +47,12 @@ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) case AF_INET: fl.oif = 0; fl.proto = IPPROTO_UDP, - fl.nl_u.ip4_u.saddr = 0; - fl.nl_u.ip4_u.daddr = peer->srx.transport.sin.sin_addr.s_addr; - fl.nl_u.ip4_u.tos = 0; + fl.fl4_dst = peer->srx.transport.sin.sin_addr.s_addr; + fl.fl4_src = 0; + fl.fl4_tos = 0; /* assume AFS.CM talking to AFS.FS */ - fl.uli_u.ports.sport = htons(7001); - fl.uli_u.ports.dport = htons(7000); + fl.fl_ip_sport = htons(7001); + fl.fl_ip_dport = htons(7000); break; default: BUG(); -- cgit v1.2.3-59-g8ed1b From 4bce22b9b84032c77c7e038b07b24fcc706dfc10 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 16 Nov 2010 11:49:58 -0800 Subject: mac80211: defines for AC numbers In many places we've just hardcoded the AC numbers -- which is a relic from the original mac80211 (d80211). Add constants for them so we know what we're talking about. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 14 ++++++++++++++ net/mac80211/wme.c | 11 ++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 1248369a7c30..5b0fff2178bb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -96,6 +96,20 @@ enum ieee80211_max_queues { IEEE80211_MAX_QUEUES = 4, }; +/** + * enum ieee80211_ac_numbers - AC numbers as used in mac80211 + * @IEEE80211_AC_VO: voice + * @IEEE80211_AC_VI: video + * @IEEE80211_AC_BE: best effort + * @IEEE80211_AC_BK: background + */ +enum ieee80211_ac_numbers { + IEEE80211_AC_VO = 0, + IEEE80211_AC_VI = 1, + IEEE80211_AC_BE = 2, + IEEE80211_AC_BK = 3, +}; + /** * struct ieee80211_tx_queue_params - transmit queue configuration * diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 34e6d02da779..58e75bbc1f91 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -21,7 +21,16 @@ /* Default mapping in classifier to work with default * queue setup. */ -const int ieee802_1d_to_ac[8] = { 2, 3, 3, 2, 1, 1, 0, 0 }; +const int ieee802_1d_to_ac[8] = { + IEEE80211_AC_BE, + IEEE80211_AC_BK, + IEEE80211_AC_BK, + IEEE80211_AC_BE, + IEEE80211_AC_VI, + IEEE80211_AC_VI, + IEEE80211_AC_VO, + IEEE80211_AC_VO +}; static int wme_downgrade_ac(struct sk_buff *skb) { -- cgit v1.2.3-59-g8ed1b From ef22b7b65f0eda9015becc7bff225a399914a242 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 18 Nov 2010 09:40:04 -0800 Subject: net: Fix duplicate volatile warning. jiffies is defined as "volatile". extern unsigned long volatile __jiffy_data jiffies; ACCESS_ONCE() uses "volatile". As a result, some compilers warn duplicate `volatile' for ACCESS_ONCE(jiffies). Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 55590ab16b3e..6beb1ffc2b7f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -303,7 +303,7 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - unsigned long now = ACCESS_ONCE(jiffies); + unsigned long now = jiffies; if (neigh->used != now) neigh->used = now; -- cgit v1.2.3-59-g8ed1b From 86107fd170bc379869250eb7e1bd393a3a70e8ae Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Tue, 16 Nov 2010 10:58:48 +0900 Subject: nl80211/mac80211: Report signal average Extend nl80211 to report an exponential weighted moving average (EWMA) of the signal value. Since the signal value usually fluctuates between different packets, an average can be more useful than the value of the last packet. This uses the recently added generic EWMA library function. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 4 ++++ net/mac80211/Kconfig | 1 + net/mac80211/cfg.c | 3 ++- net/mac80211/rx.c | 1 + net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 3 +++ net/wireless/nl80211.c | 3 +++ 8 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e498890..1ce3775e9e26 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,6 +1161,7 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1178,6 +1179,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, + NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8fd9eebd0cc9..69e2364889f1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,6 +424,7 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled + * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -439,6 +440,7 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, + STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -485,6 +487,7 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm + * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -505,6 +508,7 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; + s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d6f8653ec88..798d9b9462e2 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 + select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479e..92c9cf6a7d1c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,8 +343,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d2fcd22ab06d..9dd60a74181f 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,6 +1156,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; + ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eff58571fd7e..f43fca8907f7 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; + ewma_init(&sta->avg_signal, 1000, 8); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 9265acadef32..84062e2c782c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "key.h" /** @@ -224,6 +225,7 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA + * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -291,6 +293,7 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; + struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 605553842226..d06a40d17002 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,6 +1872,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); + if (sinfo->filled & STATION_INFO_SIGNAL_AVG) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, + sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-59-g8ed1b From 20a95a2169d1cd3da50cf65ba882d0e27a4a2d4f Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Sat, 20 Nov 2010 18:07:21 +0000 Subject: netns: let net_generic take pointer-to-const args This commit is same in nature as v2.6.37-rc1-755-g3654654; the network namespace itself is not modified when calling net_generic, so the parameter can be const. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- include/net/netns/generic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h index 81a31c0db3e7..3419bf5cd154 100644 --- a/include/net/netns/generic.h +++ b/include/net/netns/generic.h @@ -30,7 +30,7 @@ struct net_generic { void *ptr[0]; }; -static inline void *net_generic(struct net *net, int id) +static inline void *net_generic(const struct net *net, int id) { struct net_generic *ng; void *ptr; -- cgit v1.2.3-59-g8ed1b From b2e253cf300c5e33f49b7dd8b593bfc722177401 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 17 Nov 2010 21:46:09 -0800 Subject: cfg80211: Fix regulatory bug with multiple cards and delays When two cards are connected with the same regulatory domain if CRDA had a delayed response then cfg80211's own set regulatory domain would still be the world regulatory domain. There was a bug on cfg80211's logic such that it assumed that once you pegged a request as the last request it was already the currently set regulatory domain. This would mean we would race setting a stale regulatory domain to secondary cards which had the same regulatory domain since the alpha2 would match. We fix this by processing each regulatory request atomically, and only move on to the next one once we get it fully processed. In the case CRDA is not present we will simply world roam. This issue is only present when you have a slow system and the CRDA processing is delayed. Because of this it is not a known regression. Without this fix when a delay is present with CRDA the second card would end up with an intersected regulatory domain and not allow it to use the channels it really is designed for. When two cards with two different regulatory domains were inserted you'd end up rejecting the second card's regulatory domain request. This fails with mac80211_hswim's regtest=2 (two requests, same alpha2) and regtest=3 (two requests, different alpha2) module parameter options. This was reproduced and tested against mac80211_hwsim using this CRDA delayer: #!/bin/bash echo $COUNTRY >> /tmp/log sleep 2 /sbin/crda.orig And these regulatory tests: modprobe mac80211_hwsim regtest=2 modprobe mac80211_hwsim regtest=3 Reported-by: Mark Mentovai Signed-off-by: Luis R. Rodriguez Tested-by: Mark Mentovai Tested-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/regulatory.h | 7 +++++++ net/wireless/reg.c | 52 +++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 50 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index 9e103a4e91ee..356d6e3dc20a 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -43,6 +43,12 @@ enum environment_cap { * @intersect: indicates whether the wireless core should intersect * the requested regulatory domain with the presently set regulatory * domain. + * @processed: indicates whether or not this requests has already been + * processed. When the last request is processed it means that the + * currently regulatory domain set on cfg80211 is updated from + * CRDA and can be used by other regulatory requests. When a + * the last request is not yet processed we must yield until it + * is processed before processing any new requests. * @country_ie_checksum: checksum of the last processed and accepted * country IE * @country_ie_env: lets us know if the AP is telling us we are outdoor, @@ -54,6 +60,7 @@ struct regulatory_request { enum nl80211_reg_initiator initiator; char alpha2[2]; bool intersect; + bool processed; enum environment_cap country_ie_env; struct list_head list; }; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b522c46c4748..bc14caab19cd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1320,6 +1320,21 @@ static int ignore_request(struct wiphy *wiphy, return -EINVAL; } +static void reg_set_request_processed(void) +{ + bool need_more_processing = false; + + last_request->processed = true; + + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) + need_more_processing = true; + spin_unlock(®_requests_lock); + + if (need_more_processing) + schedule_work(®_work); +} + /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this @@ -1395,8 +1410,10 @@ new_request: * have applied the requested regulatory domain before we just * inform userspace we have processed the request */ - if (r == -EALREADY) + if (r == -EALREADY) { nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + } return r; } @@ -1428,7 +1445,11 @@ static void reg_process_hint(struct regulatory_request *reg_request) wiphy_update_regulatory(wiphy, initiator); } -/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ +/* + * Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* + * Regulatory hints come on a first come first serve basis and we + * must process each one atomically. + */ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request; @@ -1436,19 +1457,30 @@ static void reg_process_pending_hints(void) mutex_lock(&cfg80211_mutex); mutex_lock(®_mutex); + /* When last_request->processed becomes true this will be rescheduled */ + if (last_request && !last_request->processed) { + REG_DBG_PRINT("Pending regulatory request, waiting " + "for it to be processed..."); + goto out; + } + spin_lock(®_requests_lock); - while (!list_empty(®_requests_list)) { - reg_request = list_first_entry(®_requests_list, - struct regulatory_request, - list); - list_del_init(®_request->list); + if (list_empty(®_requests_list)) { spin_unlock(®_requests_lock); - reg_process_hint(reg_request); - spin_lock(®_requests_lock); + goto out; } + + reg_request = list_first_entry(®_requests_list, + struct regulatory_request, + list); + list_del_init(®_request->list); + spin_unlock(®_requests_lock); + reg_process_hint(reg_request); + +out: mutex_unlock(®_mutex); mutex_unlock(&cfg80211_mutex); } @@ -2057,6 +2089,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) nl80211_send_reg_change_event(last_request); + reg_set_request_processed(); + mutex_unlock(®_mutex); return r; -- cgit v1.2.3-59-g8ed1b From 456b61bca8ee324ab6c18b065e632c9a8c88aa39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 13:12:15 +0000 Subject: ipv6: mcast: RCU conversion ipv6_sk_mc_lock rwlock becomes a spinlock. readers (inet6_mc_check()) now takes rcu_read_lock() instead of read lock. Writers dont need to disable BH anymore. struct ipv6_mc_socklist objects are reclaimed after one RCU grace period. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/if_inet6.h | 3 +- net/ipv6/mcast.c | 75 +++++++++++++++++++++++++++++--------------------- 3 files changed, 47 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 8e429d0e0405..0c997767429a 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -364,7 +364,7 @@ struct ipv6_pinfo { __u32 dst_cookie; - struct ipv6_mc_socklist *ipv6_mc_list; + struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist *ipv6_fl_list; diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index f95ff8d9aa47..04977eefb0ee 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -89,10 +89,11 @@ struct ip6_sf_socklist { struct ipv6_mc_socklist { struct in6_addr addr; int ifindex; - struct ipv6_mc_socklist *next; + struct ipv6_mc_socklist __rcu *next; rwlock_t sflock; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ struct ip6_sf_socklist *sflist; + struct rcu_head rcu; }; struct ip6_sf_list { diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 9c5074528a71..49f986d626a0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -82,7 +82,7 @@ static void *__mld2_query_bugs[] __attribute__((__unused__)) = { static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT; /* Big mc list lock for all the sockets */ -static DEFINE_RWLOCK(ipv6_sk_mc_lock); +static DEFINE_SPINLOCK(ipv6_sk_mc_lock); static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); @@ -123,6 +123,11 @@ int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; * socket join on multicast group */ +#define for_each_pmc_rcu(np, pmc) \ + for (pmc = rcu_dereference(np->ipv6_mc_list); \ + pmc != NULL; \ + pmc = rcu_dereference(pmc->next)) + int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct net_device *dev = NULL; @@ -134,15 +139,15 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - read_lock_bh(&ipv6_sk_mc_lock); - for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc_lst) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); return -EADDRINUSE; } } - read_unlock_bh(&ipv6_sk_mc_lock); + rcu_read_unlock(); mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); @@ -186,33 +191,41 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return err; } - write_lock_bh(&ipv6_sk_mc_lock); + spin_lock(&ipv6_sk_mc_lock); mc_lst->next = np->ipv6_mc_list; - np->ipv6_mc_list = mc_lst; - write_unlock_bh(&ipv6_sk_mc_lock); + rcu_assign_pointer(np->ipv6_mc_list, mc_lst); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_unlock(); return 0; } +static void ipv6_mc_socklist_reclaim(struct rcu_head *head) +{ + kfree(container_of(head, struct ipv6_mc_socklist, rcu)); +} /* * socket leave on multicast group */ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6_mc_socklist *mc_lst, **lnk; + struct ipv6_mc_socklist *mc_lst; + struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) { + spin_lock(&ipv6_sk_mc_lock); + for (lnk = &np->ipv6_mc_list; + (mc_lst = rcu_dereference_protected(*lnk, + lockdep_is_held(&ipv6_sk_mc_lock))) !=NULL ; + lnk = &mc_lst->next) { if ((ifindex == 0 || mc_lst->ifindex == ifindex) && ipv6_addr_equal(&mc_lst->addr, addr)) { struct net_device *dev; *lnk = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -225,11 +238,12 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); return 0; } } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); return -EADDRNOTAVAIL; } @@ -272,12 +286,13 @@ void ipv6_sock_mc_close(struct sock *sk) struct ipv6_mc_socklist *mc_lst; struct net *net = sock_net(sk); - write_lock_bh(&ipv6_sk_mc_lock); - while ((mc_lst = np->ipv6_mc_list) != NULL) { + spin_lock(&ipv6_sk_mc_lock); + while ((mc_lst = rcu_dereference_protected(np->ipv6_mc_list, + lockdep_is_held(&ipv6_sk_mc_lock))) != NULL) { struct net_device *dev; np->ipv6_mc_list = mc_lst->next; - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); rcu_read_lock(); dev = dev_get_by_index_rcu(net, mc_lst->ifindex); @@ -290,11 +305,13 @@ void ipv6_sock_mc_close(struct sock *sk) } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); rcu_read_unlock(); - sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); - write_lock_bh(&ipv6_sk_mc_lock); + atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); + call_rcu(&mc_lst->rcu, ipv6_mc_socklist_reclaim); + + spin_lock(&ipv6_sk_mc_lock); } - write_unlock_bh(&ipv6_sk_mc_lock); + spin_unlock(&ipv6_sk_mc_lock); } int ip6_mc_source(int add, int omode, struct sock *sk, @@ -328,8 +345,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk, err = -EADDRNOTAVAIL; - read_lock(&ipv6_sk_mc_lock); - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -428,7 +444,6 @@ int ip6_mc_source(int add, int omode, struct sock *sk, done: if (pmclocked) write_unlock(&pmc->sflock); - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -466,14 +481,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) dev = idev->dev; err = 0; - read_lock(&ipv6_sk_mc_lock); if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) { leavegroup = 1; goto done; } - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(&pmc->addr, group)) @@ -521,7 +535,6 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf) write_unlock(&pmc->sflock); err = 0; done: - read_unlock(&ipv6_sk_mc_lock); read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) @@ -562,7 +575,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, * so reading the list is safe. */ - for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { + for_each_pmc_rcu(inet6, pmc) { if (pmc->ifindex != gsf->gf_interface) continue; if (ipv6_addr_equal(group, &pmc->addr)) @@ -612,13 +625,13 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, struct ip6_sf_socklist *psl; int rv = 1; - read_lock(&ipv6_sk_mc_lock); - for (mc = np->ipv6_mc_list; mc; mc = mc->next) { + rcu_read_lock(); + for_each_pmc_rcu(np, mc) { if (ipv6_addr_equal(&mc->addr, mc_addr)) break; } if (!mc) { - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return 1; } read_lock(&mc->sflock); @@ -638,7 +651,7 @@ int inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr, rv = 0; } read_unlock(&mc->sflock); - read_unlock(&ipv6_sk_mc_lock); + rcu_read_unlock(); return rv; } -- cgit v1.2.3-59-g8ed1b From bba14de98753cb6599a2dae0e520714b2153522d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Nov 2010 14:09:15 +0000 Subject: scm: lower SCM_MAX_FD Lower SCM_MAX_FD from 255 to 253 so that allocations for scm_fp_list are halved. (commit f8d570a4 added two pointers in this structure) scm_fp_dup() should not copy whole structure (and trigger kmemcheck warnings), but only the used part. While we are at it, only allocate needed size. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/scm.h | 5 +++-- net/core/scm.c | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/scm.h b/include/net/scm.h index 31656506d967..745460fa2f02 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -10,11 +10,12 @@ /* Well, we should have at least one descriptor open * to accept passed FDs 8) */ -#define SCM_MAX_FD 255 +#define SCM_MAX_FD 253 struct scm_fp_list { struct list_head list; - int count; + short count; + short max; struct file *fp[SCM_MAX_FD]; }; diff --git a/net/core/scm.c b/net/core/scm.c index 413cab89017d..bbe454450801 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -79,10 +79,11 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp) return -ENOMEM; *fplp = fpl; fpl->count = 0; + fpl->max = SCM_MAX_FD; } fpp = &fpl->fp[fpl->count]; - if (fpl->count + num > SCM_MAX_FD) + if (fpl->count + num > fpl->max) return -EINVAL; /* @@ -331,11 +332,12 @@ struct scm_fp_list *scm_fp_dup(struct scm_fp_list *fpl) if (!fpl) return NULL; - new_fpl = kmalloc(sizeof(*fpl), GFP_KERNEL); + new_fpl = kmemdup(fpl, offsetof(struct scm_fp_list, fp[fpl->count]), + GFP_KERNEL); if (new_fpl) { - for (i=fpl->count-1; i>=0; i--) + for (i = 0; i < fpl->count; i++) get_file(fpl->fp[i]); - memcpy(new_fpl, fpl, sizeof(*fpl)); + new_fpl->max = new_fpl->count; } return new_fpl; } -- cgit v1.2.3-59-g8ed1b From ccb14354017272ddac002e859a2711610b6af174 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 24 Nov 2010 16:18:36 -0500 Subject: Revert "nl80211/mac80211: Report signal average" This reverts commit 86107fd170bc379869250eb7e1bd393a3a70e8ae. This patch inadvertantly changed the userland ABI. Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 -- include/net/cfg80211.h | 4 ---- net/mac80211/Kconfig | 1 - net/mac80211/cfg.c | 3 +-- net/mac80211/rx.c | 1 - net/mac80211/sta_info.c | 2 -- net/mac80211/sta_info.h | 3 --- net/wireless/nl80211.c | 3 --- 8 files changed, 1 insertion(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 1ce3775e9e26..037b4e498890 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1161,7 +1161,6 @@ enum nl80211_rate_info { * @__NL80211_STA_INFO_AFTER_LAST: internal * @NL80211_STA_INFO_MAX: highest possible station info attribute * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm) - * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute * containing info as possible, see &enum nl80211_sta_info_txrate. * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) @@ -1179,7 +1178,6 @@ enum nl80211_sta_info { NL80211_STA_INFO_PLID, NL80211_STA_INFO_PLINK_STATE, NL80211_STA_INFO_SIGNAL, - NL80211_STA_INFO_SIGNAL_AVG, NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 69e2364889f1..8fd9eebd0cc9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -424,7 +424,6 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled - * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -440,7 +439,6 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, - STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -487,7 +485,6 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm - * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -508,7 +505,6 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; - s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 798d9b9462e2..4d6f8653ec88 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,7 +6,6 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 - select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 92c9cf6a7d1c..0c544074479e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -343,9 +343,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; + sinfo->filled |= STATION_INFO_SIGNAL; sinfo->signal = (s8)sta->last_signal; - sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dd60a74181f..d2fcd22ab06d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1156,7 +1156,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; - ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f43fca8907f7..eff58571fd7e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,8 +244,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; - ewma_init(&sta->avg_signal, 1000, 8); - if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 84062e2c782c..9265acadef32 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,7 +13,6 @@ #include #include #include -#include #include "key.h" /** @@ -225,7 +224,6 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA - * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -293,7 +291,6 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; - struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d06a40d17002..605553842226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1872,9 +1872,6 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); - if (sinfo->filled & STATION_INFO_SIGNAL_AVG) - NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, - sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-59-g8ed1b From dd5b4cc71cd09c33e1579cc6d5720656e94e52de Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 22 Nov 2010 20:58:24 +0100 Subject: cfg80211/mac80211: improve ad-hoc multicast rate handling - store the multicast rate as an index instead of the rate value (reduces cpu overhead in a hotpath) - validate the rate values (must match a bitrate in at least one sband) Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++-- include/net/mac80211.h | 4 ++-- net/mac80211/ibss.c | 3 ++- net/mac80211/rate.c | 25 ++++++++++++------------- net/wireless/nl80211.c | 36 +++++++++++++++++++++++++++++++++--- 5 files changed, 51 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 91f099556ac1..dd4c43f512e2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -923,7 +923,7 @@ struct cfg80211_disassoc_request { * @privacy: this is a protected network, keys will be configured * after joining * @basic_rates: bitmap of basic rates to use when creating the IBSS - * @mcast_rate: multicast tx rate (in 100 kbps) + * @mcast_rate: per-band multicast rate index + 1 (0: disabled) */ struct cfg80211_ibss_params { u8 *ssid; @@ -935,7 +935,7 @@ struct cfg80211_ibss_params { u32 basic_rates; bool channel_fixed; bool privacy; - int mcast_rate; + int mcast_rate[IEEE80211_NUM_BANDS]; }; /** diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5b0fff2178bb..08e97e5d03fd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -219,7 +219,7 @@ enum ieee80211_bss_change { * @basic_rates: bitmap of basic rates, each bit stands for an * index into the rate table configured by the driver in * the current band. - * @mcast_rate: multicast rate for AP and Ad-Hoc (in 100 kbps) + * @mcast_rate: per-band multicast rate index + 1 (0: disabled) * @bssid: The BSSID for this BSS * @enable_beacon: whether beaconing should be enabled or not * @channel_type: Channel type for this BSS -- the hardware might be @@ -259,7 +259,7 @@ struct ieee80211_bss_conf { u16 assoc_capability; u64 timestamp; u32 basic_rates; - u32 mcast_rate; + int mcast_rate[IEEE80211_NUM_BANDS]; u16 ht_operation_mode; s32 cqm_rssi_thold; u32 cqm_rssi_hyst; diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 6fe6837dc134..410d104b1347 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -915,7 +915,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.privacy = params->privacy; sdata->u.ibss.basic_rates = params->basic_rates; - sdata->vif.bss_conf.mcast_rate = params->mcast_rate; + memcpy(sdata->vif.bss_conf.mcast_rate, params->mcast_rate, + sizeof(params->mcast_rate)); sdata->vif.bss_conf.beacon_int = params->beacon_interval; diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 76de4f8d9327..3d5a2cb835c4 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -211,20 +211,11 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); } -static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u32 mcast_rate, +static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, struct ieee80211_supported_band *sband) { u8 i; - if (mcast_rate) { - for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == mcast_rate) { - *idx = i; - return; - } - } - } - if (basic_rates == 0) return; /* assume basic rates unknown and accept rate */ if (*idx < 0) @@ -247,17 +238,25 @@ bool rate_control_send_low(struct ieee80211_sta *sta, struct ieee80211_tx_rate_control *txrc) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb); + struct ieee80211_supported_band *sband = txrc->sband; + int mcast_rate; if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) { info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta); info->control.rates[0].count = (info->flags & IEEE80211_TX_CTL_NO_ACK) ? 1 : txrc->hw->max_rate_tries; - if (!sta && txrc->bss) + if (!sta && txrc->bss) { + mcast_rate = txrc->bss_conf->mcast_rate[sband->band]; + if (mcast_rate > 0) { + info->control.rates[0].idx = mcast_rate - 1; + return true; + } + rc_send_low_broadcast(&info->control.rates[0].idx, txrc->bss_conf->basic_rates, - txrc->bss_conf->mcast_rate, - txrc->sband); + sband); + } return true; } return false; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b15eb77195d8..8734efa663d1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3600,6 +3600,34 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) local_state_change); } +static bool +nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, + int mcast_rate[IEEE80211_NUM_BANDS], + int rateval) +{ + struct wiphy *wiphy = &rdev->wiphy; + bool found = false; + int band, i; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = wiphy->bands[band]; + if (!sband) + continue; + + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == rateval) { + mcast_rate[band] = i + 1; + found = true; + break; + } + } + } + + return found; +} + static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -3683,9 +3711,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } - if (info->attrs[NL80211_ATTR_MCAST_RATE]) - ibss.mcast_rate = - nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); + + if (info->attrs[NL80211_ATTR_MCAST_RATE] && + !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, + nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) + return -EINVAL; if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, -- cgit v1.2.3-59-g8ed1b From 79b1c460a0b55e55981c25c56597c4d5d2872de3 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 24 Nov 2010 14:34:41 +0900 Subject: cfg80211: Add documentation for antenna ops Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/mac80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 08e97e5d03fd..eaa4affd40cd 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1745,6 +1745,13 @@ enum ieee80211_ampdu_mlme_action { * completion of the channel switch. * * @napi_poll: Poll Rx queue for incoming data frames. + * + * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device. + * Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may + * reject TX/RX mask combinations they cannot support by returning -EINVAL + * (also see nl80211.h @NL80211_ATTR_WIPHY_ANTENNA_TX). + * + * @get_antenna: Get current antenna configuration from device (tx_ant, rx_ant). */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); -- cgit v1.2.3-59-g8ed1b From c063dbf52b998b852122dff07a8b8dd430b38437 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Nov 2010 08:10:05 +0100 Subject: cfg80211: allow using CQM event to notify packet loss This adds the ability for drivers to use CQM events to notify about packet loss for specific stations (which could be the AP for the managed mode case). Since the threshold might be determined by the driver (it isn't passed in right now) it will be passed out of the driver to userspace in the event. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 3 +++ include/net/cfg80211.h | 12 ++++++++++++ net/wireless/mlme.c | 12 ++++++++++++ net/wireless/nl80211.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 4 ++++ 5 files changed, 76 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 037b4e498890..d706bf3badc8 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1819,6 +1819,8 @@ enum nl80211_ps_state { * the minimum amount the RSSI level must change after an event before a * new event may be issued (to reduce effects of RSSI oscillation). * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event + * @NL80211_ATTR_CQM_PKT_LOSS_EVENT: a u32 value indicating that this many + * consecutive packets were not acknowledged by the peer * @__NL80211_ATTR_CQM_AFTER_LAST: internal * @NL80211_ATTR_CQM_MAX: highest key attribute */ @@ -1827,6 +1829,7 @@ enum nl80211_attr_cqm { NL80211_ATTR_CQM_RSSI_THOLD, NL80211_ATTR_CQM_RSSI_HYST, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, + NL80211_ATTR_CQM_PKT_LOSS_EVENT, /* keep last */ __NL80211_ATTR_CQM_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index dd4c43f512e2..0663945cfa48 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2601,6 +2601,18 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +/** + * cfg80211_cqm_pktloss_notify - notify userspace about packetloss to peer + * @dev: network device + * @peer: peer's MAC address + * @num_packets: how many packets were lost -- should be a fixed threshold + * but probably no less than maybe 50, or maybe a throughput dependent + * threshold (to account for temporary interference) + * @gfp: context flags + */ +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp); + /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 26838d903b9a..6980a0c315b2 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1028,3 +1028,15 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); + +void cfg80211_cqm_pktloss_notify(struct net_device *dev, + const u8 *peer, u32 num_packets, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + /* Indicate roaming trigger event to user space */ + nl80211_send_cqm_pktloss_notify(rdev, dev, peer, num_packets, gfp); +} +EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8734efa663d1..67ff7e92cb99 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5715,6 +5715,51 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp) +{ + struct sk_buff *msg; + struct nlattr *pinfoattr; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, peer); + + pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM); + if (!pinfoattr) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets); + + nla_nest_end(msg, pinfoattr); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 30d2f939150d..16c2f7190768 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -87,5 +87,9 @@ nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); +void +nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *peer, + u32 num_packets, gfp_t gfp); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3-59-g8ed1b From cf7afbfeb8ceb0187348d0a1a0db61305e25f05f Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 22 Nov 2010 01:31:54 +0000 Subject: rtnl: make link af-specific updates atomic As David pointed out correctly, updates to af-specific attributes are currently not atomic. If multiple changes are requested and one of them fails, previous updates may have been applied already leaving the link behind in a undefined state. This patch splits the function parse_link_af() into two functions validate_link_af() and set_link_at(). validate_link_af() is placed to validate_linkmsg() check for errors as early as possible before any changes to the link have been made. set_link_af() is called to commit the changes later. This method is not fail proof, while it is currently sufficient to make set_link_af() inerrable and thus 100% atomic, the validation function method will not be able to detect all error scenarios in the future, there will likely always be errors depending on states which are f.e. not protected by rtnl_mutex and thus may change between validation and setting. Also, instead of silently ignoring unknown address families and config blocks for address families which did not register a set function the errors EAFNOSUPPORT respectively EOPNOSUPPORT are returned to avoid comitting 4 out of 5 update requests without notifying the user. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 12 ++++++++---- net/core/rtnetlink.c | 29 ++++++++++++++++++++++++----- net/ipv4/devinet.c | 26 +++++++++++++++++++++----- net/ipv6/addrconf.c | 6 ------ 4 files changed, 53 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 35be0bbcd7da..4093ca78cf60 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -92,8 +92,10 @@ extern void rtnl_link_unregister(struct rtnl_link_ops *ops); * specific netlink attributes. * @get_link_af_size: Function to calculate size of address family specific * netlink attributes exlusive the container attribute. - * @parse_link_af: Function to parse a IFLA_AF_SPEC attribute and modify - * net_device accordingly. + * @validate_link_af: Validate a IFLA_AF_SPEC attribute, must check attr + * for invalid configuration settings. + * @set_link_af: Function to parse a IFLA_AF_SPEC attribute and modify + * net_device accordingly. */ struct rtnl_af_ops { struct list_head list; @@ -103,8 +105,10 @@ struct rtnl_af_ops { const struct net_device *dev); size_t (*get_link_af_size)(const struct net_device *dev); - int (*parse_link_af)(struct net_device *dev, - const struct nlattr *attr); + int (*validate_link_af)(const struct net_device *dev, + const struct nlattr *attr); + int (*set_link_af)(struct net_device *dev, + const struct nlattr *attr); }; extern int __rtnl_af_register(struct rtnl_af_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bf69e5871b1a..750db57f3bb3 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1107,6 +1107,28 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EINVAL; } + if (tb[IFLA_AF_SPEC]) { + struct nlattr *af; + int rem, err; + + nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { + const struct rtnl_af_ops *af_ops; + + if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + return -EAFNOSUPPORT; + + if (!af_ops->set_link_af) + return -EOPNOTSUPP; + + if (af_ops->validate_link_af) { + err = af_ops->validate_link_af(dev, + tb[IFLA_AF_SPEC]); + if (err < 0) + return err; + } + } + } + return 0; } @@ -1356,12 +1378,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, const struct rtnl_af_ops *af_ops; if (!(af_ops = rtnl_af_lookup(nla_type(af)))) - continue; - - if (!af_ops->parse_link_af) - continue; + BUG(); - err = af_ops->parse_link_af(dev, af); + err = af_ops->set_link_af(dev, af); if (err < 0) goto errout; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 71afc26c2df8..d9f71bae45c4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1289,14 +1289,14 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { [IFLA_INET_CONF] = { .type = NLA_NESTED }, }; -static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla) +static int inet_validate_link_af(const struct net_device *dev, + const struct nlattr *nla) { - struct in_device *in_dev = __in_dev_get_rcu(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; - if (!in_dev) - return -EOPNOTSUPP; + if (dev && !__in_dev_get_rcu(dev)) + return -EAFNOSUPPORT; err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy); if (err < 0) @@ -1314,6 +1314,21 @@ static int inet_parse_link_af(struct net_device *dev, const struct nlattr *nla) } } + return 0; +} + +static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + struct nlattr *a, *tb[IFLA_INET_MAX+1]; + int rem; + + if (!in_dev) + return -EAFNOSUPPORT; + + if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0) + BUG(); + if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a)); @@ -1689,7 +1704,8 @@ static struct rtnl_af_ops inet_af_ops = { .family = AF_INET, .fill_link_af = inet_fill_link_af, .get_link_af_size = inet_get_link_af_size, - .parse_link_af = inet_parse_link_af, + .validate_link_af = inet_validate_link_af, + .set_link_af = inet_set_link_af, }; void __init devinet_init(void) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4cf760598c2a..1023ad0d2b15 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3956,11 +3956,6 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) return 0; } -static int inet6_parse_link_af(struct net_device *dev, const struct nlattr *nla) -{ - return -EOPNOTSUPP; -} - static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, u32 pid, u32 seq, int event, unsigned int flags) { @@ -4670,7 +4665,6 @@ static struct rtnl_af_ops inet6_ops = { .family = AF_INET6, .fill_link_af = inet6_fill_link_af, .get_link_af_size = inet6_get_link_af_size, - .parse_link_af = inet6_parse_link_af, }; /* -- cgit v1.2.3-59-g8ed1b From 5584b8078a60e34ec7d37c9b67a0f3d389a1a2f6 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 22 Nov 2010 23:00:42 +0000 Subject: sctp: kill unused macro definition These macros have been existed for several years since v2.6.12-rc2. But they never be used. So remove them now. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 63908840eef0..c70d8ccc55cb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -61,7 +61,6 @@ enum { SCTP_DEFAULT_INSTREAMS = SCTP_MAX_STREAM }; * symbols. CIDs are dense through SCTP_CID_BASE_MAX. */ #define SCTP_CID_BASE_MAX SCTP_CID_SHUTDOWN_COMPLETE -#define SCTP_CID_MAX SCTP_CID_ASCONF_ACK #define SCTP_NUM_BASE_CHUNK_TYPES (SCTP_CID_BASE_MAX + 1) @@ -86,9 +85,6 @@ typedef enum { } sctp_event_t; -#define SCTP_EVENT_T_MAX SCTP_EVENT_T_PRIMITIVE -#define SCTP_EVENT_T_NUM (SCTP_EVENT_T_MAX + 1) - /* As a convenience for the state machine, we append SCTP_EVENT_* and * SCTP_ULP_* to the list of possible chunks. */ @@ -162,9 +158,6 @@ SCTP_SUBTYPE_CONSTRUCTOR(PRIMITIVE, sctp_event_primitive_t, primitive) - (unsigned long)(c->chunk_hdr)\ - sizeof(sctp_data_chunk_t))) -#define SCTP_MAX_ERROR_CAUSE SCTP_ERROR_NONEXIST_IP -#define SCTP_NUM_ERROR_CAUSE 10 - /* Internal error codes */ typedef enum { @@ -266,7 +259,6 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 }; #define SCTP_TSN_MAP_INITIAL BITS_PER_LONG #define SCTP_TSN_MAP_INCREMENT SCTP_TSN_MAP_INITIAL #define SCTP_TSN_MAP_SIZE 4096 -#define SCTP_TSN_MAX_GAP 65535 /* We will not record more than this many duplicate TSNs between two * SACKs. The minimum PMTU is 576. Remove all the headers and there @@ -301,9 +293,6 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_CLOCK_GRANULARITY 1 /* 1 jiffy */ -#define SCTP_DEF_MAX_INIT 6 -#define SCTP_DEF_MAX_SEND 10 - #define SCTP_DEFAULT_COOKIE_LIFE (60 * 1000) /* 60 seconds */ #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ @@ -317,9 +306,6 @@ enum { SCTP_MAX_GABS = 16 }; */ #define SCTP_DEFAULT_MINSEGMENT 512 /* MTU size ... if no mtu disc */ #define SCTP_HOW_MANY_SECRETS 2 /* How many secrets I keep */ -#define SCTP_HOW_LONG_COOKIE_LIVE 3600 /* How many seconds the current - * secret will live? - */ #define SCTP_SECRET_SIZE 32 /* Number of octets in a 256 bits. */ #define SCTP_SIGNATURE_SIZE 20 /* size of a SLA-1 signature */ -- cgit v1.2.3-59-g8ed1b From 5595a1a5997953dbd8c5df7c2f7d4b3a2eb2be4b Mon Sep 17 00:00:00 2001 From: andrew hendry Date: Thu, 25 Nov 2010 02:18:15 +0000 Subject: X25 remove bkl in subscription ioctls Signed-off-by: Andrew Hendry Signed-off-by: David S. Miller --- include/net/x25.h | 2 ++ net/x25/af_x25.c | 12 ++++-------- net/x25/x25_link.c | 8 ++++++-- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/x25.h b/include/net/x25.h index 1479cb4a41fc..a06119a05129 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -315,6 +315,8 @@ extern struct list_head x25_route_list; extern rwlock_t x25_route_list_lock; extern struct list_head x25_forward_list; extern rwlock_t x25_forward_list_lock; +extern struct list_head x25_neigh_list; +extern rwlock_t x25_neigh_list_lock; extern int x25_proc_init(void); extern void x25_proc_exit(void); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 2351aceb296d..45be72c3f940 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1415,17 +1415,13 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = x25_route_ioctl(cmd, argp); break; case SIOCX25GSUBSCRIP: - lock_kernel(); rc = x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GFACILITIES: { struct x25_facilities fac = x25->facilities; @@ -1646,16 +1642,20 @@ static int compat_x25_subscr_ioctl(unsigned int cmd, dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(x25_subscr32, &x25_subscr, sizeof(*x25_subscr32)) ? -EFAULT : 0; } else { rc = -EINVAL; if (x25_subscr.extended == 0 || x25_subscr.extended == 1) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); @@ -1711,17 +1711,13 @@ static int compat_x25_ioctl(struct socket *sock, unsigned int cmd, rc = x25_route_ioctl(cmd, argp); break; case SIOCX25GSUBSCRIP: - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25SSUBSCRIP: rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; - lock_kernel(); rc = compat_x25_subscr_ioctl(cmd, argp); - unlock_kernel(); break; case SIOCX25GFACILITIES: case SIOCX25SFACILITIES: diff --git a/net/x25/x25_link.c b/net/x25/x25_link.c index 73e7b954ad28..4c81f6abb65b 100644 --- a/net/x25/x25_link.c +++ b/net/x25/x25_link.c @@ -31,8 +31,8 @@ #include #include -static LIST_HEAD(x25_neigh_list); -static DEFINE_RWLOCK(x25_neigh_list_lock); +LIST_HEAD(x25_neigh_list); +DEFINE_RWLOCK(x25_neigh_list_lock); static void x25_t20timer_expiry(unsigned long); @@ -360,16 +360,20 @@ int x25_subscr_ioctl(unsigned int cmd, void __user *arg) dev_put(dev); if (cmd == SIOCX25GSUBSCRIP) { + read_lock_bh(&x25_neigh_list_lock); x25_subscr.extended = nb->extended; x25_subscr.global_facil_mask = nb->global_facil_mask; + read_unlock_bh(&x25_neigh_list_lock); rc = copy_to_user(arg, &x25_subscr, sizeof(x25_subscr)) ? -EFAULT : 0; } else { rc = -EINVAL; if (!(x25_subscr.extended && x25_subscr.extended != 1)) { rc = 0; + write_lock_bh(&x25_neigh_list_lock); nb->extended = x25_subscr.extended; nb->global_facil_mask = x25_subscr.global_facil_mask; + write_unlock_bh(&x25_neigh_list_lock); } } x25_neigh_put(nb); -- cgit v1.2.3-59-g8ed1b From aa285b1740f5b13e5a2606a927f3129954583d78 Mon Sep 17 00:00:00 2001 From: Timo Teräs Date: Tue, 23 Nov 2010 04:03:45 +0000 Subject: xfrm: fix gre key endianess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fl->fl_gre_key is network byte order contrary to fl->fl_icmp_*. Make xfrm_flowi_{s|d}port return network byte order values for gre key too. Signed-off-by: Timo Teräs Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 54b283229488..7fa5b005893e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -806,7 +806,7 @@ __be16 xfrm_flowi_sport(struct flowi *fl) port = htons(fl->fl_mh_type); break; case IPPROTO_GRE: - port = htonl(fl->fl_gre_key) >> 16; + port = htons(ntohl(fl->fl_gre_key) >> 16); break; default: port = 0; /*XXX*/ @@ -830,7 +830,7 @@ __be16 xfrm_flowi_dport(struct flowi *fl) port = htons(fl->fl_icmp_code); break; case IPPROTO_GRE: - port = htonl(fl->fl_gre_key) & 0xffff; + port = htons(ntohl(fl->fl_gre_key) & 0xffff); break; default: port = 0; /*XXX*/ -- cgit v1.2.3-59-g8ed1b From 49b4a6546fac02f58784f0744e0f99a6562ccc03 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Mon, 29 Nov 2010 00:14:58 +0000 Subject: sctp: kill unused macros in head file 1. SCTP_CMD_NUM_VERBS,SCTP_CMD_MAX These two macros have never been used for several years since v2.6.12-rc2. 2.sctp_port_rover,sctp_port_alloc_lock The commit 063930 abandoned global variables of port_rover and port_alloc_lock, but still keep two macros to refer to them. So, remove them now. commit 06393009000779b00a558fd2f280882cc7dc2008 Author: Stephen Hemminger Date: Wed Oct 10 17:30:18 2007 -0700 [SCTP]: port randomization Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/sctp/command.h | 3 --- include/net/sctp/structs.h | 2 -- 2 files changed, 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h index 2c55a7ea20af..c01dc99def07 100644 --- a/include/net/sctp/command.h +++ b/include/net/sctp/command.h @@ -111,9 +111,6 @@ typedef enum { SCTP_CMD_LAST } sctp_verb_t; -#define SCTP_CMD_MAX (SCTP_CMD_LAST - 1) -#define SCTP_CMD_NUM_VERBS (SCTP_CMD_MAX + 1) - /* How many commands can you put in an sctp_cmd_seq_t? * This is a rather arbitrary number, ideally derived from a careful * analysis of the state functions, but in reality just taken from diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 69fef4fb79c0..cc9185ca8fd1 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -261,8 +261,6 @@ extern struct sctp_globals { #define sctp_assoc_hashsize (sctp_globals.assoc_hashsize) #define sctp_assoc_hashtable (sctp_globals.assoc_hashtable) #define sctp_port_hashsize (sctp_globals.port_hashsize) -#define sctp_port_rover (sctp_globals.port_rover) -#define sctp_port_alloc_lock (sctp_globals.port_alloc_lock) #define sctp_port_hashtable (sctp_globals.port_hashtable) #define sctp_local_addr_list (sctp_globals.local_addr_list) #define sctp_local_addr_lock (sctp_globals.addr_list_lock) -- cgit v1.2.3-59-g8ed1b From 25888e30319f8896fc656fc68643e6a078263060 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 25 Nov 2010 04:11:39 +0000 Subject: af_unix: limit recursion level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its easy to eat all kernel memory and trigger NMI watchdog, using an exploit program that queues unix sockets on top of others. lkml ref : http://lkml.org/lkml/2010/11/25/8 This mechanism is used in applications, one choice we have is to have a recursion limit. Other limits might be needed as well (if we queue other types of files), since the passfd mechanism is currently limited by socket receive queue sizes only. Add a recursion_level to unix socket, allowing up to 4 levels. Each time we send an unix socket through sendfd mechanism, we copy its recursion level (plus one) to receiver. This recursion level is cleared when socket receive queue is emptied. Reported-by: Марк Коренберг Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/af_unix.h | 2 ++ net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++++++----- net/unix/garbage.c | 2 +- 3 files changed, 35 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 90c9e2872f27..18e5c3f67580 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -10,6 +10,7 @@ extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); extern void wait_for_unix_gc(void); +extern struct sock *unix_get_socket(struct file *filp); #define UNIX_HASH_SIZE 256 @@ -56,6 +57,7 @@ struct unix_sock { spinlock_t lock; unsigned int gc_candidate : 1; unsigned int gc_maybe_cycle : 1; + unsigned char recursion_level; struct socket_wq peer_wq; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3c95304a0817..2268e6798124 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1343,9 +1343,25 @@ static void unix_destruct_scm(struct sk_buff *skb) sock_wfree(skb); } +#define MAX_RECURSION_LEVEL 4 + static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + unsigned char max_level = 0; + int unix_sock_count = 0; + + for (i = scm->fp->count - 1; i >= 0; i--) { + struct sock *sk = unix_get_socket(scm->fp->fp[i]); + + if (sk) { + unix_sock_count++; + max_level = max(max_level, + unix_sk(sk)->recursion_level); + } + } + if (unlikely(max_level > MAX_RECURSION_LEVEL)) + return -ETOOMANYREFS; /* * Need to duplicate file references for the sake of garbage @@ -1356,9 +1372,11 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) if (!UNIXCB(skb).fp) return -ENOMEM; - for (i = scm->fp->count-1; i >= 0; i--) - unix_inflight(scm->fp->fp[i]); - return 0; + if (unix_sock_count) { + for (i = scm->fp->count - 1; i >= 0; i--) + unix_inflight(scm->fp->fp[i]); + } + return max_level; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1393,6 +1411,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sk_buff *skb; long timeo; struct scm_cookie tmp_scm; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1431,8 +1450,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; err = unix_scm_to_skb(siocb->scm, skb, true); - if (err) + if (err < 0) goto out_free; + max_level = err + 1; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1514,6 +1534,8 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, len); sock_put(other); @@ -1544,6 +1566,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, int sent = 0; struct scm_cookie tmp_scm; bool fds_sent = false; + int max_level; if (NULL == siocb->scm) siocb->scm = &tmp_scm; @@ -1607,10 +1630,11 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); - if (err) { + if (err < 0) { kfree_skb(skb); goto out_err; } + max_level = err + 1; fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); @@ -1626,6 +1650,8 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, goto pipe_err_free; skb_queue_tail(&other->sk_receive_queue, skb); + if (max_level > unix_sk(other)->recursion_level) + unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other, size); sent += size; @@ -1845,6 +1871,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_lock(sk); skb = skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) { + unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 40df93d1cf35..f89f83bf828e 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -96,7 +96,7 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; -static struct sock *unix_get_socket(struct file *filp) +struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = filp->f_path.dentry->d_inode; -- cgit v1.2.3-59-g8ed1b From f7ca38dfe58c20cb1aa2ed9643187e8b194b5bae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 25 Nov 2010 10:02:29 +0100 Subject: nl80211/cfg80211: extend mgmt-tx API for off-channel With p2p, it is sometimes necessary to transmit a frame (typically an action frame) on another channel than the current channel. Enable this through the CMD_FRAME API, and allow it to wait for a response. A new command allows that wait to be aborted. However, allow userspace to specify whether or not it wants to allow off-channel TX, it may actually want to use the same channel only. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 25 +++++++++++++++++----- include/net/cfg80211.h | 11 +++++++--- net/mac80211/cfg.c | 7 ++++-- net/wireless/core.h | 4 ++-- net/wireless/mlme.c | 9 ++++---- net/wireless/nl80211.c | 57 +++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 91 insertions(+), 22 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index d706bf3badc8..5cfa579df476 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -358,11 +358,16 @@ * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on - * which channel the frame is to be transmitted or was received. This - * channel has to be the current channel (remain-on-channel or the - * operational channel). When called, this operation returns a cookie - * (%NL80211_ATTR_COOKIE) that will be included with the TX status event - * pertaining to the TX request. + * which channel the frame is to be transmitted or was received. If this + * channel is not the current channel (remain-on-channel or the + * operational channel) the device will switch to the given channel and + * transmit the frame, optionally waiting for a response for the time + * specified using %NL80211_ATTR_DURATION. When called, this operation + * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the + * TX status event pertaining to the TX request. + * @NL80211_CMD_FRAME_WAIT_CANCEL: When an off-channel TX was requested, this + * command may be used with the corresponding cookie to cancel the wait + * time if it is known that it is no longer necessary. * @NL80211_CMD_ACTION: Alias for @NL80211_CMD_FRAME for backward compatibility. * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies @@ -493,6 +498,8 @@ enum nl80211_commands { NL80211_CMD_SET_CHANNEL, NL80211_CMD_SET_WDS_PEER, + NL80211_CMD_FRAME_WAIT_CANCEL, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -828,6 +835,12 @@ enum nl80211_commands { * * @NL80211_ATTR_MCAST_RATE: Multicast tx rate (in 100 kbps) for IBSS * + * @NL80211_ATTR_OFFCHANNEL_TX_OK: For management frame TX, the frame may be + * transmitted on another channel when the channel given doesn't match + * the current channel. If the current channel doesn't match and this + * flag isn't set, the frame will be rejected. This is also used as an + * nl80211 capability flag. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1002,6 +1015,8 @@ enum nl80211_attrs { NL80211_ATTR_MCAST_RATE, + NL80211_ATTR_OFFCHANNEL_TX_OK, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0663945cfa48..49a7c53a48ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1134,7 +1134,9 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @mgmt_tx: Transmit a management frame + * @mgmt_tx: Transmit a management frame. + * @mgmt_tx_cancel_wait: Cancel the wait time from transmitting a management + * frame on another channel * * @testmode_cmd: run a test mode command * @@ -1291,10 +1293,13 @@ struct cfg80211_ops { u64 cookie); int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); + int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, + struct net_device *dev, + u64 cookie); int (*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev, bool enabled, int timeout); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 0c544074479e..aac2d7de828e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1552,9 +1552,9 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, } static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -1565,6 +1565,9 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX | IEEE80211_TX_CTL_REQ_TX_STATUS; + if (offchan) + return -EOPNOTSUPP; + /* Check that we are on the requested channel for transmission */ if (chan != local->tmp_channel && chan != local->oper_channel) diff --git a/net/wireless/core.h b/net/wireless/core.h index 6583cca0e2ee..ee80ad8dc655 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -341,9 +341,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie); /* SME */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 6980a0c315b2..d7680f2a4c5b 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -864,9 +864,9 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_channel *chan, + struct ieee80211_channel *chan, bool offchan, enum nl80211_channel_type channel_type, - bool channel_type_valid, + bool channel_type_valid, unsigned int wait, const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -946,8 +946,9 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan, + channel_type, channel_type_valid, + wait, buf, len, cookie); } bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67ff7e92cb99..960be4e650f0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -163,16 +163,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, - [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, - [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, - [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, - [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, + [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ @@ -677,6 +674,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); + CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -698,6 +696,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + /* for now at least assume all drivers have it */ + if (dev->ops->mgmt_tx) + NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); + if (mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; @@ -4244,6 +4246,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) void *hdr; u64 cookie; struct sk_buff *msg; + unsigned int wait = 0; + bool offchan; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_WIPHY_FREQ]) @@ -4260,6 +4264,12 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_DURATION]) { + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EINVAL; + wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); + } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { channel_type = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); @@ -4271,6 +4281,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) channel_type_valid = true; } + offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; + freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chan = rdev_freq_to_chan(rdev, freq, channel_type); if (chan == NULL) @@ -4287,8 +4299,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, - channel_type_valid, + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type, + channel_type_valid, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), &cookie); @@ -4307,6 +4319,31 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + u64 cookie; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (!rdev->ops->mgmt_tx_cancel_wait) + return -EOPNOTSUPP; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EOPNOTSUPP; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + return rdev->ops->mgmt_tx_cancel_wait(&rdev->wiphy, dev, cookie); +} + static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4879,6 +4916,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, + .doit = nl80211_tx_mgmt_cancel_wait, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .doit = nl80211_set_power_save, -- cgit v1.2.3-59-g8ed1b From 20ed3166c84d145589a89d8cde12aa32cf2d17f4 Mon Sep 17 00:00:00 2001 From: Johannes Stezenbach Date: Tue, 30 Nov 2010 16:49:23 +0100 Subject: mac80211/rt2x00: add ieee80211_tx_status_ni() All rt2x00 drivers except rt2800pci call ieee80211_tx_status() from a workqueue, which causes "NOHZ: local_softirq_pending 08" messages. To fix it, add ieee80211_tx_status_ni() similar to ieee80211_rx_ni() which can be called from process context, and call it from rt2x00lib_txdone(). For the rt2800pci special case a driver flag is introduced. Signed-off-by: Johannes Stezenbach Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800pci.c | 1 + drivers/net/wireless/rt2x00/rt2x00.h | 1 + drivers/net/wireless/rt2x00/rt2x00dev.c | 9 ++++++--- include/net/mac80211.h | 28 ++++++++++++++++++++++++---- 4 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c index 433c7f3ef837..b989b0d3ed49 100644 --- a/drivers/net/wireless/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/rt2x00/rt2800pci.c @@ -911,6 +911,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev) __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags); __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags); __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags); + __set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags); if (!modparam_nohwcrypt) __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags); __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags); diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 0a55eeff871e..e72117f3fdf5 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -664,6 +664,7 @@ enum rt2x00_flags { DRIVER_REQUIRE_COPY_IV, DRIVER_REQUIRE_L2PAD, DRIVER_REQUIRE_TXSTATUS_FIFO, + DRIVER_REQUIRE_TASKLET_CONTEXT, /* * Driver features diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index c879f9a7037c..bd3afc92f434 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -379,9 +379,12 @@ void rt2x00lib_txdone(struct queue_entry *entry, * through a mac80211 library call (RTS/CTS) then we should not * send the status report back. */ - if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) - ieee80211_tx_status(rt2x00dev->hw, entry->skb); - else + if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) { + if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags)) + ieee80211_tx_status(rt2x00dev->hw, entry->skb); + else + ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb); + } else dev_kfree_skb_any(entry->skb); /* diff --git a/include/net/mac80211.h b/include/net/mac80211.h index eaa4affd40cd..e411cf87fb41 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2055,8 +2055,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, * * This function may not be called in IRQ context. Calls to this function * for a single hardware must be synchronized against each other. Calls - * to this function and ieee80211_tx_status_irqsafe() may not be mixed - * for a single hardware. + * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe() + * may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call @@ -2064,14 +2064,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_tx_status_ni - transmit status callback (in process context) + * + * Like ieee80211_tx_status() but can be called in process context. + * + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_irqsafe() may not be mixed + * for a single hardware. + * + * @hw: the hardware the frame was transmitted by + * @skb: the frame that was transmitted, owned by mac80211 after this call + */ +static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, + struct sk_buff *skb) +{ + local_bh_disable(); + ieee80211_tx_status(hw, skb); + local_bh_enable(); +} + /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * * Like ieee80211_tx_status() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function and ieee80211_tx_status() may not be mixed for a - * single hardware. + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call -- cgit v1.2.3-59-g8ed1b From 582a72da9a41be9227dc931d728ae2906880a589 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Nov 2010 11:53:55 -0800 Subject: inetpeer: Introduce inet_peer_address_t. Currently only the v4 aspect is used, but this will change. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 +++++++++- net/ipv4/inetpeer.c | 16 ++++++++-------- net/ipv4/tcp_ipv4.c | 2 +- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index fe239bfe5f7f..d7e60792d76e 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -13,10 +13,18 @@ #include #include +typedef struct { + union { + __be32 a4; + __be32 a6[4]; + }; + __u16 family; +} inet_peer_address_t; + struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ struct inet_peer __rcu *avl_left, *avl_right; - __be32 v4daddr; /* peer's address */ + inet_peer_address_t daddr; __u32 avl_height; struct list_head unused; __u32 dtime; /* the time of last use of not diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f94400848921..893f998efdbb 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -63,7 +63,7 @@ * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * dtime: unused node list lock - * v4daddr: unchangeable + * daddr: unchangeable * ip_id_count: atomic value (no lock needed) */ @@ -165,9 +165,9 @@ static void unlink_from_unused(struct inet_peer *p) for (u = rcu_dereference_protected(_base->root, \ lockdep_is_held(&_base->lock)); \ u != peer_avl_empty; ) { \ - if (_daddr == u->v4daddr) \ + if (_daddr == u->daddr.a4) \ break; \ - if ((__force __u32)_daddr < (__force __u32)u->v4daddr) \ + if ((__force __u32)_daddr < (__force __u32)u->daddr.a4) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ @@ -191,7 +191,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base int count = 0; while (u != peer_avl_empty) { - if (daddr == u->v4daddr) { + if (daddr == u->daddr.a4) { /* Before taking a reference, check if this entry was * deleted, unlink_from_pool() sets refcnt=-1 to make * distinction between an unused entry (refcnt=0) and @@ -201,7 +201,7 @@ static struct inet_peer *lookup_rcu_bh(__be32 daddr, struct inet_peer_base *base u = NULL; return u; } - if ((__force __u32)daddr < (__force __u32)u->v4daddr) + if ((__force __u32)daddr < (__force __u32)u->daddr.a4) u = rcu_dereference_bh(u->avl_left); else u = rcu_dereference_bh(u->avl_right); @@ -354,7 +354,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) if (atomic_cmpxchg(&p->refcnt, 1, -1) == 1) { struct inet_peer __rcu **stack[PEER_MAXDEPTH]; struct inet_peer __rcu ***stackptr, ***delp; - if (lookup(p->v4daddr, stack, base) != p) + if (lookup(p->daddr.a4, stack, base) != p) BUG(); delp = stackptr - 1; /* *delp[0] == p */ if (p->avl_left == peer_avl_empty_rcu) { @@ -367,7 +367,7 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base) BUG_ON(rcu_dereference_protected(*stackptr[-1], lockdep_is_held(&base->lock)) != t); **--stackptr = t->avl_left; - /* t is removed, t->v4daddr > x->v4daddr for any + /* t is removed, t->daddr > x->daddr for any * x in p->avl_left subtree. * Put t in the old place of p. */ RCU_INIT_POINTER(*delp[0], t); @@ -479,7 +479,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { - p->v4daddr = daddr; + p->daddr.a4 = daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); atomic_set(&p->ip_id_count, secure_ip_id(daddr)); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 69ccbc1dde9c..b8bbf89409b0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1347,7 +1347,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_death_row.sysctl_tw_recycle && (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && - peer->v4daddr == saddr) { + peer->daddr.a4 == saddr) { inet_peer_refcheck(peer); if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL && (s32)(peer->tcp_ts - req->ts_recent) > -- cgit v1.2.3-59-g8ed1b From b534ecf1cd26f094497da6ae28a6ab64cdbe1617 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Nov 2010 11:54:19 -0800 Subject: inetpeer: Make inet_getpeer() take an inet_peer_adress_t pointer. And make an inet_getpeer_v4() helper, update callers. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 11 ++++++++++- net/ipv4/inetpeer.c | 10 +++++----- net/ipv4/ip_fragment.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp_ipv4.c | 4 ++-- 5 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index d7e60792d76e..834f0456c87e 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -50,7 +50,16 @@ struct inet_peer { void inet_initpeers(void) __init; /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(__be32 daddr, int create); +struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create); + +static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) +{ + inet_peer_address_t daddr; + + daddr.a4 = v4daddr; + daddr.family = AF_INET; + return inet_getpeer(&daddr, create); +} /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 893f998efdbb..9aa76b8dd490 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -444,7 +444,7 @@ static struct inet_peer_base *family_to_base(int family) } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__be32 daddr, int create) +struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(AF_INET); @@ -454,7 +454,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock_bh(); - p = lookup_rcu_bh(daddr, base); + p = lookup_rcu_bh(daddr->a4, base); rcu_read_unlock_bh(); if (p) { @@ -469,7 +469,7 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) * At least, nodes should be hot in our cache. */ spin_lock_bh(&base->lock); - p = lookup(daddr, stack, base); + p = lookup(daddr->a4, stack, base); if (p != peer_avl_empty) { atomic_inc(&p->refcnt); spin_unlock_bh(&base->lock); @@ -479,10 +479,10 @@ struct inet_peer *inet_getpeer(__be32 daddr, int create) } p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; if (p) { - p->daddr.a4 = daddr; + p->daddr = *daddr; atomic_set(&p->refcnt, 1); atomic_set(&p->rid, 0); - atomic_set(&p->ip_id_count, secure_ip_id(daddr)); + atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); p->tcp_ts_stamp = 0; INIT_LIST_HEAD(&p->unused); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 168440834ade..e6215bdd96c0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -141,7 +141,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a) qp->daddr = arg->iph->daddr; qp->user = arg->user; qp->peer = sysctl_ipfrag_max_dist ? - inet_getpeer(arg->iph->saddr, 1) : NULL; + inet_getpeer_v4(arg->iph->saddr, 1) : NULL; } static __inline__ void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ec2333fb637e..3843c2dfde82 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1289,7 +1289,7 @@ void rt_bind_peer(struct rtable *rt, int create) { struct inet_peer *peer; - peer = inet_getpeer(rt->rt_dst, create); + peer = inet_getpeer_v4(rt->rt_dst, create); if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL) inet_putpeer(peer); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b8bbf89409b0..00285fcf6788 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1778,7 +1778,7 @@ int tcp_v4_remember_stamp(struct sock *sk) int release_it = 0; if (!rt || rt->rt_dst != inet->inet_daddr) { - peer = inet_getpeer(inet->inet_daddr, 1); + peer = inet_getpeer_v4(inet->inet_daddr, 1); release_it = 1; } else { if (!rt->peer) @@ -1804,7 +1804,7 @@ EXPORT_SYMBOL(tcp_v4_remember_stamp); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1); if (peer) { const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); -- cgit v1.2.3-59-g8ed1b From 672f007d65f50468a4a1e55825fe58e5b035324d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Nov 2010 12:20:00 -0800 Subject: inetpeer: Add inet_getpeer_v6() Now that all of the infrastructure is in place, we can add the ipv6 shorthand for peer creation. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 834f0456c87e..fb8aeb1fd23f 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -11,6 +11,7 @@ #include #include #include +#include #include typedef struct { @@ -61,6 +62,15 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) return inet_getpeer(&daddr, create); } +static inline struct inet_peer *inet_getpeer_v6(struct in6_addr *v6daddr, int create) +{ + inet_peer_address_t daddr; + + ipv6_addr_copy((struct in6_addr *)daddr.a6, v6daddr); + daddr.family = AF_INET6; + return inet_getpeer(&daddr, create); +} + /* can be called from BH context or outside */ extern void inet_putpeer(struct inet_peer *p); -- cgit v1.2.3-59-g8ed1b From b3419363808f2481b24a817f491878e1795db4c7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 30 Nov 2010 12:27:11 -0800 Subject: ipv6: Add infrastructure to bind inet_peer objects to routes. They are only allowed on cached ipv6 routes. Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 2 ++ include/net/ip6_route.h | 3 +++ net/ipv4/inetpeer.c | 2 ++ net/ipv6/route.c | 18 ++++++++++++++++++ 4 files changed, 25 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 062a823d311c..708ff7cb8806 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -21,6 +21,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_HASHSZ 256 @@ -109,6 +110,7 @@ struct rt6_info { u32 rt6i_metric; struct inet6_dev *rt6i_idev; + struct inet_peer *rt6i_peer; #ifdef CONFIG_XFRM u32 rt6i_flow_cache_genid; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 278312c95f96..23fed28db4bb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -56,6 +56,9 @@ static inline unsigned int rt6_flags2srcprefs(int flags) return (flags >> 3) & 7; } +extern void rt6_bind_peer(struct rt6_info *rt, + int create); + extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct net *net, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 1c1335b0d401..f95b89f3916d 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -532,6 +532,7 @@ static int compute_total(void) { return v4_peers.total + v6_peers.total; } +EXPORT_SYMBOL_GPL(inet_getpeer); /* Called with local BH disabled. */ static void peer_check_expire(unsigned long dummy) @@ -577,3 +578,4 @@ void inet_putpeer(struct inet_peer *p) local_bh_enable(); } +EXPORT_SYMBOL_GPL(inet_putpeer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a0c4ad109c63..026caef0326c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -188,11 +188,29 @@ static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; struct inet6_dev *idev = rt->rt6i_idev; + struct inet_peer *peer = rt->rt6i_peer; if (idev != NULL) { rt->rt6i_idev = NULL; in6_dev_put(idev); } + if (peer) { + BUG_ON(!(rt->rt6i_flags & RTF_CACHE)); + rt->rt6i_peer = NULL; + inet_putpeer(peer); + } +} + +void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer *peer; + + if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE))) + return; + + peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); + if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) + inet_putpeer(peer); } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, -- cgit v1.2.3-59-g8ed1b From 3f419d2d487821093ee46e898b5f8747f9edc9cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Nov 2010 13:37:14 -0800 Subject: inet: Turn ->remember_stamp into ->get_peer in connection AF ops. Then we can make a completely generic tcp_remember_stamp() that uses ->get_peer() as a helper, minimizing the AF specific code and minimizing the eventual code duplication when we implement the ipv6 side of TW recycling. Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 +- include/net/tcp.h | 2 +- net/ipv4/tcp_ipv4.c | 35 ++++++++--------------------------- net/ipv4/tcp_minisocks.c | 31 ++++++++++++++++++++++++++++++- net/ipv6/tcp_ipv6.c | 8 ++++---- 5 files changed, 44 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index e4f494b42e06..6c93a56cc958 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -43,7 +43,7 @@ struct inet_connection_sock_af_ops { struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst); - int (*remember_stamp)(struct sock *sk); + struct inet_peer *(*get_peer)(struct sock *sk, bool *release_it); u16 net_header_len; u16 sockaddr_len; int (*setsockopt)(struct sock *sk, int level, int optname, diff --git a/include/net/tcp.h b/include/net/tcp.h index e36c874c7fb1..3e239641d4ee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -312,7 +312,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); -extern int tcp_v4_remember_stamp(struct sock *sk); +extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 00285fcf6788..0ddf819cfb5d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1763,44 +1763,25 @@ do_time_wait: goto discard_it; } -/* VJ's idea. Save last timestamp seen from this destination - * and hold it at least for normal timewait interval to use for duplicate - * segment detection in subsequent connections, before they enter synchronized - * state. - */ - -int tcp_v4_remember_stamp(struct sock *sk) +struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) { + struct rtable *rt = (struct rtable *) __sk_dst_get(sk); struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_get(sk); - struct inet_peer *peer = NULL; - int release_it = 0; + struct inet_peer *peer; if (!rt || rt->rt_dst != inet->inet_daddr) { peer = inet_getpeer_v4(inet->inet_daddr, 1); - release_it = 1; + *release_it = true; } else { if (!rt->peer) rt_bind_peer(rt, 1); peer = rt->peer; + *release_it = false; } - if (peer) { - if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; - peer->tcp_ts = tp->rx_opt.ts_recent; - } - if (release_it) - inet_putpeer(peer); - return 1; - } - - return 0; + return peer; } -EXPORT_SYMBOL(tcp_v4_remember_stamp); +EXPORT_SYMBOL(tcp_v4_get_peer); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { @@ -1828,7 +1809,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 43cf901d7659..059082c873cf 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -49,6 +49,35 @@ struct inet_timewait_death_row tcp_death_row = { }; EXPORT_SYMBOL_GPL(tcp_death_row); +/* VJ's idea. Save last timestamp seen from this destination + * and hold it at least for normal timewait interval to use for duplicate + * segment detection in subsequent connections, before they enter synchronized + * state. + */ + +static int tcp_remember_stamp(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct inet_peer *peer; + bool release_it; + + peer = icsk->icsk_af_ops->get_peer(sk, &release_it); + if (peer) { + if ((s32)(peer->tcp_ts - tp->rx_opt.ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tp->rx_opt.ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tp->rx_opt.ts_recent_stamp; + peer->tcp_ts = tp->rx_opt.ts_recent; + } + if (release_it) + inet_putpeer(peer); + return 1; + } + + return 0; +} + static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -274,7 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) int recycle_ok = 0; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) - recycle_ok = icsk->icsk_af_ops->remember_stamp(sk); + recycle_ok = tcp_remember_stamp(sk); if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) tw = inet_twsk_alloc(sk, state); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7e41e2cbb85e..e394d0029d8d 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1818,10 +1818,10 @@ do_time_wait: goto discard_it; } -static int tcp_v6_remember_stamp(struct sock *sk) +struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) { /* Alas, not yet... */ - return 0; + return NULL; } static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1830,7 +1830,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = { .rebuild_header = inet6_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v6_remember_stamp, + .get_peer = tcp_v6_get_peer, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -1862,7 +1862,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, - .remember_stamp = tcp_v4_remember_stamp, + .get_peer = tcp_v4_get_peer, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, -- cgit v1.2.3-59-g8ed1b From 735cbc4784a084b7a76c43c69f9dba683bb3b48b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 1 Dec 2010 16:58:22 +0200 Subject: Bluetooth: clean up sco code Do not use assignments in IF condition, remove extra spaces Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/sco.h | 8 ++++---- net/bluetooth/sco.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h index e28a2a771471..ea5c6641243f 100644 --- a/include/net/bluetooth/sco.h +++ b/include/net/bluetooth/sco.h @@ -55,11 +55,11 @@ struct sco_conninfo { struct sco_conn { struct hci_conn *hcon; - bdaddr_t *dst; - bdaddr_t *src; - + bdaddr_t *dst; + bdaddr_t *src; + spinlock_t lock; - struct sock *sk; + struct sock *sk; unsigned int mtu; }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 66b9e5c0523a..960c6d1637da 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include @@ -52,7 +52,7 @@ #define VERSION "0.6" -static int disable_esco = 0; +static int disable_esco; static const struct proto_ops sco_sock_ops; @@ -138,16 +138,17 @@ static inline struct sock *sco_chan_get(struct sco_conn *conn) static int sco_conn_del(struct hci_conn *hcon, int err) { - struct sco_conn *conn; + struct sco_conn *conn = hcon->sco_data; struct sock *sk; - if (!(conn = hcon->sco_data)) + if (!conn) return 0; BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); /* Kill socket */ - if ((sk = sco_chan_get(conn))) { + sk = sco_chan_get(conn); + if (sk) { bh_lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); @@ -185,7 +186,8 @@ static int sco_connect(struct sock *sk) BT_DBG("%s -> %s", batostr(src), batostr(dst)); - if (!(hdev = hci_get_route(dst, src))) + hdev = hci_get_route(dst, src); + if (!hdev) return -EHOSTUNREACH; hci_dev_lock_bh(hdev); @@ -510,7 +512,8 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen /* Set destination address and psm */ bacpy(&bt_sk(sk)->dst, &sa->sco_bdaddr); - if ((err = sco_connect(sk))) + err = sco_connect(sk); + if (err) goto done; err = bt_sock_wait_state(sk, BT_CONNECTED, @@ -828,13 +831,14 @@ static void sco_chan_del(struct sock *sk, int err) static void sco_conn_ready(struct sco_conn *conn) { - struct sock *parent, *sk; + struct sock *parent; + struct sock *sk = conn->sk; BT_DBG("conn %p", conn); sco_conn_lock(conn); - if ((sk = conn->sk)) { + if (sk) { sco_sock_clear_timer(sk); bh_lock_sock(sk); sk->sk_state = BT_CONNECTED; -- cgit v1.2.3-59-g8ed1b From 285b4e90318dcf421a00b2ac3fe8ab713f3281e3 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 1 Dec 2010 16:58:23 +0200 Subject: Bluetooth: clean up rfcomm code Remove extra spaces, assignments in if statement, zeroing static variables, extra braces. Fix includes. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/rfcomm.h | 4 ++-- net/bluetooth/rfcomm/core.c | 8 ++++---- net/bluetooth/rfcomm/sock.c | 5 +++-- net/bluetooth/rfcomm/tty.c | 28 ++++++++++++++++------------ 4 files changed, 25 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 71047bc0af84..2e7875691f0a 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -105,7 +105,7 @@ struct rfcomm_hdr { u8 addr; u8 ctrl; - u8 len; // Actual size can be 2 bytes + u8 len; /* Actual size can be 2 bytes */ } __packed; struct rfcomm_cmd { @@ -228,7 +228,7 @@ struct rfcomm_dlc { /* ---- RFCOMM SEND RPN ---- */ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 bit_rate, u8 data_bits, u8 stop_bits, - u8 parity, u8 flow_ctrl_settings, + u8 parity, u8 flow_ctrl_settings, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index fa642aa652bd..c1e2bbafb549 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -41,7 +41,7 @@ #include #include -#include +#include #include #include @@ -51,10 +51,10 @@ #define VERSION "1.11" -static int disable_cfc = 0; +static int disable_cfc; +static int l2cap_ertm; static int channel_mtu = -1; static unsigned int l2cap_mtu = RFCOMM_MAX_L2CAP_MTU; -static int l2cap_ertm = 0; static struct task_struct *rfcomm_thread; @@ -1901,7 +1901,7 @@ static inline void rfcomm_check_connection(struct rfcomm_session *s) BT_DBG("%p state %ld", s, s->state); - switch(sk->sk_state) { + switch (sk->sk_state) { case BT_CONNECTED: s->state = BT_CONNECT; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 0207bd6dbfc5..66cc1f0c3df8 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -45,7 +45,7 @@ #include #include -#include +#include #include #include @@ -888,7 +888,8 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) BT_DBG("sock %p, sk %p", sock, sk); - if (!sk) return 0; + if (!sk) + return 0; lock_sock(sk); if (!sk->sk_shutdown) { diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index a9b81f5dacd1..2575c2db6404 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -58,9 +58,9 @@ struct rfcomm_dev { bdaddr_t src; bdaddr_t dst; - u8 channel; + u8 channel; - uint modem_status; + uint modem_status; struct rfcomm_dlc *dlc; struct tty_struct *tty; @@ -69,7 +69,7 @@ struct rfcomm_dev { struct device *tty_dev; - atomic_t wmem_alloc; + atomic_t wmem_alloc; struct sk_buff_head pending; }; @@ -431,7 +431,8 @@ static int rfcomm_release_dev(void __user *arg) BT_DBG("dev_id %d flags 0x%x", req.dev_id, req.flags); - if (!(dev = rfcomm_dev_get(req.dev_id))) + dev = rfcomm_dev_get(req.dev_id); + if (!dev) return -ENODEV; if (dev->flags != NOCAP_FLAGS && !capable(CAP_NET_ADMIN)) { @@ -470,7 +471,8 @@ static int rfcomm_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*di); - if (!(dl = kmalloc(size, GFP_KERNEL))) + dl = kmalloc(size, GFP_KERNEL); + if (!dl) return -ENOMEM; di = dl->dev_info; @@ -513,7 +515,8 @@ static int rfcomm_get_dev_info(void __user *arg) if (copy_from_user(&di, arg, sizeof(di))) return -EFAULT; - if (!(dev = rfcomm_dev_get(di.id))) + dev = rfcomm_dev_get(di.id); + if (!dev) return -ENODEV; di.flags = dev->flags; @@ -561,7 +564,8 @@ static void rfcomm_dev_data_ready(struct rfcomm_dlc *dlc, struct sk_buff *skb) return; } - if (!(tty = dev->tty) || !skb_queue_empty(&dev->pending)) { + tty = dev->tty; + if (!tty || !skb_queue_empty(&dev->pending)) { skb_queue_tail(&dev->pending, skb); return; } @@ -796,7 +800,8 @@ static int rfcomm_tty_write(struct tty_struct *tty, const unsigned char *buf, in memcpy(skb_put(skb, size), buf + sent, size); - if ((err = rfcomm_dlc_send(dlc, skb)) < 0) { + err = rfcomm_dlc_send(dlc, skb); + if (err < 0) { kfree_skb(skb); break; } @@ -892,7 +897,7 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) /* Parity on/off and when on, odd/even */ if (((old->c_cflag & PARENB) != (new->c_cflag & PARENB)) || - ((old->c_cflag & PARODD) != (new->c_cflag & PARODD)) ) { + ((old->c_cflag & PARODD) != (new->c_cflag & PARODD))) { changes |= RFCOMM_RPN_PM_PARITY; BT_DBG("Parity change detected."); } @@ -937,11 +942,10 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old) /* POSIX does not support 1.5 stop bits and RFCOMM does not * support 2 stop bits. So a request for 2 stop bits gets * translated to 1.5 stop bits */ - if (new->c_cflag & CSTOPB) { + if (new->c_cflag & CSTOPB) stop_bits = RFCOMM_RPN_STOP_15; - } else { + else stop_bits = RFCOMM_RPN_STOP_1; - } /* Handle number of data bits [5-8] */ if ((old->c_cflag & CSIZE) != (new->c_cflag & CSIZE)) -- cgit v1.2.3-59-g8ed1b From 894718a6be69d8cfd191dc291b42be32a1e4851b Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 1 Dec 2010 16:58:24 +0200 Subject: Bluetooth: clean up l2cap code Do not initialize static vars to zero, macros with complex values shall be enclosed with (), remove unneeded braces. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/l2cap.h | 10 +++++----- net/bluetooth/l2cap.c | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index c819c8bf9b68..217bb91a7345 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -417,11 +417,11 @@ static inline int l2cap_tx_window_full(struct sock *sk) return sub == pi->remote_tx_win; } -#define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1 -#define __get_reqseq(ctrl) ((ctrl) & L2CAP_CTRL_REQSEQ) >> 8 -#define __is_iframe(ctrl) !((ctrl) & L2CAP_CTRL_FRAME_TYPE) -#define __is_sframe(ctrl) (ctrl) & L2CAP_CTRL_FRAME_TYPE -#define __is_sar_start(ctrl) ((ctrl) & L2CAP_CTRL_SAR) == L2CAP_SDU_START +#define __get_txseq(ctrl) (((ctrl) & L2CAP_CTRL_TXSEQ) >> 1) +#define __get_reqseq(ctrl) (((ctrl) & L2CAP_CTRL_REQSEQ) >> 8) +#define __is_iframe(ctrl) (!((ctrl) & L2CAP_CTRL_FRAME_TYPE)) +#define __is_sframe(ctrl) ((ctrl) & L2CAP_CTRL_FRAME_TYPE) +#define __is_sar_start(ctrl) (((ctrl) & L2CAP_CTRL_SAR) == L2CAP_SDU_START) void l2cap_load(void); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index a1c7ae88dd1f..c12eccfdfe01 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -57,7 +57,7 @@ #define VERSION "2.15" -static int disable_ertm = 0; +static int disable_ertm; static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN; static u8 l2cap_fixed_chan[8] = { 0x02, }; @@ -4162,11 +4162,10 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control) __mod_retrans_timer(); pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY; - if (pi->conn_state & L2CAP_CONN_SREJ_SENT) { + if (pi->conn_state & L2CAP_CONN_SREJ_SENT) l2cap_send_ack(pi); - } else { + else l2cap_ertm_send(sk); - } } } -- cgit v1.2.3-59-g8ed1b From 70f23020e6d89155504b5b39f22505f4aec6fa6f Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 1 Dec 2010 16:58:25 +0200 Subject: Bluetooth: clean up hci code Do not use assignment in IF condition, remove extra spaces, fixing typos, simplify code. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci.h | 4 +-- include/net/bluetooth/hci_core.h | 14 ++++----- net/bluetooth/hci_conn.c | 23 +++++++++----- net/bluetooth/hci_core.c | 66 +++++++++++++++++++++++++--------------- net/bluetooth/hci_event.c | 8 +++-- net/bluetooth/hci_sock.c | 17 +++++++---- 6 files changed, 82 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e30e00834340..e9527c512345 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -489,7 +489,7 @@ struct hci_rp_read_local_name { #define HCI_OP_WRITE_PG_TIMEOUT 0x0c18 -#define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a +#define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a #define SCAN_DISABLED 0x00 #define SCAN_INQUIRY 0x01 #define SCAN_PAGE 0x02 @@ -874,7 +874,7 @@ struct hci_ev_si_security { struct hci_command_hdr { __le16 opcode; /* OCF & OGF */ - __u8 plen; + __u8 plen; } __packed; struct hci_event_hdr { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ebec8c9a929d..9c08625617a1 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -44,15 +44,15 @@ struct inquiry_data { }; struct inquiry_entry { - struct inquiry_entry *next; + struct inquiry_entry *next; __u32 timestamp; struct inquiry_data data; }; struct inquiry_cache { - spinlock_t lock; + spinlock_t lock; __u32 timestamp; - struct inquiry_entry *list; + struct inquiry_entry *list; }; struct hci_conn_hash { @@ -141,7 +141,7 @@ struct hci_dev { void *driver_data; void *core_data; - atomic_t promisc; + atomic_t promisc; struct dentry *debugfs; @@ -150,7 +150,7 @@ struct hci_dev { struct rfkill *rfkill; - struct module *owner; + struct module *owner; int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); @@ -215,8 +215,8 @@ extern rwlock_t hci_dev_list_lock; extern rwlock_t hci_cb_list_lock; /* ----- Inquiry cache ----- */ -#define INQUIRY_CACHE_AGE_MAX (HZ*30) // 30 seconds -#define INQUIRY_ENTRY_AGE_MAX (HZ*60) // 60 seconds +#define INQUIRY_CACHE_AGE_MAX (HZ*30) /* 30 seconds */ +#define INQUIRY_ENTRY_AGE_MAX (HZ*60) /* 60 seconds */ #define inquiry_cache_lock(c) spin_lock(&c->lock) #define inquiry_cache_unlock(c) spin_unlock(&c->lock) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 0b1e460fe440..6b90a4191734 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include @@ -66,7 +66,8 @@ void hci_acl_connect(struct hci_conn *conn) bacpy(&cp.bdaddr, &conn->dst); cp.pscan_rep_mode = 0x02; - if ((ie = hci_inquiry_cache_lookup(hdev, &conn->dst))) { + ie = hci_inquiry_cache_lookup(hdev, &conn->dst); + if (ie) { if (inquiry_entry_age(ie) <= INQUIRY_ENTRY_AGE_MAX) { cp.pscan_rep_mode = ie->data.pscan_rep_mode; cp.pscan_mode = ie->data.pscan_mode; @@ -368,8 +369,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 BT_DBG("%s dst %s", hdev->name, batostr(dst)); - if (!(acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst))) { - if (!(acl = hci_conn_add(hdev, ACL_LINK, dst))) + acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst); + if (!acl) { + acl = hci_conn_add(hdev, ACL_LINK, dst); + if (!acl) return NULL; } @@ -389,8 +392,10 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 if (type == ACL_LINK) return acl; - if (!(sco = hci_conn_hash_lookup_ba(hdev, type, dst))) { - if (!(sco = hci_conn_add(hdev, type, dst))) { + sco = hci_conn_hash_lookup_ba(hdev, type, dst); + if (!sco) { + sco = hci_conn_add(hdev, type, dst); + if (!sco) { hci_conn_put(acl); return NULL; } @@ -647,10 +652,12 @@ int hci_get_conn_list(void __user *arg) size = sizeof(req) + req.conn_num * sizeof(*ci); - if (!(cl = kmalloc(size, GFP_KERNEL))) + cl = kmalloc(size, GFP_KERNEL); + if (!cl) return -ENOMEM; - if (!(hdev = hci_dev_get(req.dev_id))) { + hdev = hci_dev_get(req.dev_id); + if (!hdev) { kfree(cl); return -ENODEV; } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index bc2a052e518b..51c61f75a797 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include @@ -349,20 +349,23 @@ struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *b void hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data) { struct inquiry_cache *cache = &hdev->inq_cache; - struct inquiry_entry *e; + struct inquiry_entry *ie; BT_DBG("cache %p, %s", cache, batostr(&data->bdaddr)); - if (!(e = hci_inquiry_cache_lookup(hdev, &data->bdaddr))) { + ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr); + if (!ie) { /* Entry not in the cache. Add new one. */ - if (!(e = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC))) + ie = kzalloc(sizeof(struct inquiry_entry), GFP_ATOMIC); + if (!ie) return; - e->next = cache->list; - cache->list = e; + + ie->next = cache->list; + cache->list = ie; } - memcpy(&e->data, data, sizeof(*data)); - e->timestamp = jiffies; + memcpy(&ie->data, data, sizeof(*data)); + ie->timestamp = jiffies; cache->timestamp = jiffies; } @@ -422,16 +425,20 @@ int hci_inquiry(void __user *arg) hci_dev_lock_bh(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || - inquiry_cache_empty(hdev) || - ir.flags & IREQ_CACHE_FLUSH) { + inquiry_cache_empty(hdev) || + ir.flags & IREQ_CACHE_FLUSH) { inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock_bh(hdev); timeo = ir.length * msecs_to_jiffies(2000); - if (do_inquiry && (err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo)) < 0) - goto done; + + if (do_inquiry) { + err = hci_request(hdev, hci_inq_req, (unsigned long)&ir, timeo); + if (err < 0) + goto done; + } /* for unlimited number of responses we will use buffer with 255 entries */ max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp; @@ -439,7 +446,8 @@ int hci_inquiry(void __user *arg) /* cache_dump can't sleep. Therefore we allocate temp buffer and then * copy it to the user space. */ - if (!(buf = kmalloc(sizeof(struct inquiry_info) * max_rsp, GFP_KERNEL))) { + buf = kmalloc(sizeof(struct inquiry_info) *max_rsp, GFP_KERNEL); + if (!buf) { err = -ENOMEM; goto done; } @@ -611,7 +619,8 @@ int hci_dev_close(__u16 dev) struct hci_dev *hdev; int err; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; err = hci_dev_do_close(hdev); hci_dev_put(hdev); @@ -623,7 +632,8 @@ int hci_dev_reset(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; hci_req_lock(hdev); @@ -663,7 +673,8 @@ int hci_dev_reset_stat(__u16 dev) struct hci_dev *hdev; int ret = 0; - if (!(hdev = hci_dev_get(dev))) + hdev = hci_dev_get(dev); + if (!hdev) return -ENODEV; memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); @@ -682,7 +693,8 @@ int hci_dev_cmd(unsigned int cmd, void __user *arg) if (copy_from_user(&dr, arg, sizeof(dr))) return -EFAULT; - if (!(hdev = hci_dev_get(dr.dev_id))) + hdev = hci_dev_get(dr.dev_id); + if (!hdev) return -ENODEV; switch (cmd) { @@ -763,7 +775,8 @@ int hci_get_dev_list(void __user *arg) size = sizeof(*dl) + dev_num * sizeof(*dr); - if (!(dl = kzalloc(size, GFP_KERNEL))) + dl = kzalloc(size, GFP_KERNEL); + if (!dl) return -ENOMEM; dr = dl->dev_req; @@ -797,7 +810,8 @@ int hci_get_dev_info(void __user *arg) if (copy_from_user(&di, arg, sizeof(di))) return -EFAULT; - if (!(hdev = hci_dev_get(di.dev_id))) + hdev = hci_dev_get(di.dev_id); + if (!hdev) return -ENODEV; strcpy(di.name, hdev->name); @@ -905,7 +919,7 @@ int hci_register_dev(struct hci_dev *hdev) hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; - tasklet_init(&hdev->cmd_task, hci_cmd_task,(unsigned long) hdev); + tasklet_init(&hdev->cmd_task, hci_cmd_task, (unsigned long) hdev); tasklet_init(&hdev->rx_task, hci_rx_task, (unsigned long) hdev); tasklet_init(&hdev->tx_task, hci_tx_task, (unsigned long) hdev); @@ -1368,7 +1382,8 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags) bt_cb(skb)->pkt_type = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags | ACL_START); - if (!(list = skb_shinfo(skb)->frag_list)) { + list = skb_shinfo(skb)->frag_list; + if (!list) { /* Non fragmented */ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); @@ -1609,7 +1624,8 @@ static inline void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) hci_conn_enter_active_mode(conn); /* Send to upper protocol */ - if ((hp = hci_proto[HCI_PROTO_L2CAP]) && hp->recv_acldata) { + hp = hci_proto[HCI_PROTO_L2CAP]; + if (hp && hp->recv_acldata) { hp->recv_acldata(conn, skb, flags); return; } @@ -1644,7 +1660,8 @@ static inline void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) register struct hci_proto *hp; /* Send to upper protocol */ - if ((hp = hci_proto[HCI_PROTO_SCO]) && hp->recv_scodata) { + hp = hci_proto[HCI_PROTO_SCO]; + if (hp && hp->recv_scodata) { hp->recv_scodata(conn, skb); return; } @@ -1727,7 +1744,8 @@ static void hci_cmd_task(unsigned long arg) if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { kfree_skb(hdev->sent_cmd); - if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { + hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC); + if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); hdev->cmd_last_tx = jiffies; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3c1957c82b61..8923b36a67a2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -39,7 +39,7 @@ #include #include -#include +#include #include #include @@ -1512,10 +1512,12 @@ static inline void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *s conn->sent -= count; if (conn->type == ACL_LINK) { - if ((hdev->acl_cnt += count) > hdev->acl_pkts) + hdev->acl_cnt += count; + if (hdev->acl_cnt > hdev->acl_pkts) hdev->acl_cnt = hdev->acl_pkts; } else { - if ((hdev->sco_cnt += count) > hdev->sco_pkts) + hdev->sco_cnt += count; + if (hdev->sco_cnt > hdev->sco_pkts) hdev->sco_cnt = hdev->sco_pkts; } } diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 83acd164d39e..b3753bad2a55 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -43,7 +43,7 @@ #include #include -#include +#include #include #include @@ -125,7 +125,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; } - if (!(nskb = skb_clone(skb, GFP_ATOMIC))) + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) continue; /* Put type byte before the data */ @@ -370,7 +371,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le } if (haddr->hci_dev != HCI_DEV_NONE) { - if (!(hdev = hci_dev_get(haddr->hci_dev))) { + hdev = hci_dev_get(haddr->hci_dev); + if (!hdev) { err = -ENODEV; goto done; } @@ -457,7 +459,8 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state == BT_CLOSED) return 0; - if (!(skb = skb_recv_datagram(sk, flags, noblock, &err))) + skb = skb_recv_datagram(sk, flags, noblock, &err); + if (!skb) return err; msg->msg_namelen = 0; @@ -499,7 +502,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (!(hdev = hci_pi(sk)->hdev)) { + hdev = hci_pi(sk)->hdev; + if (!hdev) { err = -EBADFD; goto done; } @@ -509,7 +513,8 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, goto done; } - if (!(skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err))) + skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + if (!skb) goto done; if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) { -- cgit v1.2.3-59-g8ed1b From be21871f24b0fcd8d0d09c8090385c9cec80efa3 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Wed, 1 Dec 2010 16:58:26 +0200 Subject: Bluetooth: clean up legal text Remove extra spaces from legal text so that legal stuff looks the same for all bluetooth code. Signed-off-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci.h | 12 ++++++------ include/net/bluetooth/l2cap.h | 12 ++++++------ include/net/bluetooth/rfcomm.h | 14 +++++++------- include/net/bluetooth/sco.h | 12 ++++++------ 4 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e9527c512345..f3c5ed6d7bda 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -1,4 +1,4 @@ -/* +/* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated @@ -12,13 +12,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 217bb91a7345..7ad25ca60ec0 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -1,4 +1,4 @@ -/* +/* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2009-2010 Gustavo F. Padovan @@ -14,13 +14,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 2e7875691f0a..6eac4a760c3b 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -1,5 +1,5 @@ -/* - RFCOMM implementation for Linux Bluetooth stack (BlueZ). +/* + RFCOMM implementation for Linux Bluetooth stack (BlueZ) Copyright (C) 2002 Maxim Krasnyansky Copyright (C) 2002 Marcel Holtmann @@ -11,13 +11,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ diff --git a/include/net/bluetooth/sco.h b/include/net/bluetooth/sco.h index ea5c6641243f..1e35c43657c8 100644 --- a/include/net/bluetooth/sco.h +++ b/include/net/bluetooth/sco.h @@ -1,4 +1,4 @@ -/* +/* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated @@ -12,13 +12,13 @@ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY - CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES - WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, - COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ -- cgit v1.2.3-59-g8ed1b From 8790ca172a1550949804a2ad59ccea310f680c9f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 17:28:18 -0800 Subject: inetpeer: Kill use of inet_peer_address_t typedef. They are verboten these days. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 12 ++++++------ net/ipv4/inetpeer.c | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index fb8aeb1fd23f..5161bfdf5a52 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -14,18 +14,18 @@ #include #include -typedef struct { +struct inetpeer_addr { union { __be32 a4; __be32 a6[4]; }; __u16 family; -} inet_peer_address_t; +}; struct inet_peer { /* group together avl_left,avl_right,v4daddr to speedup lookups */ struct inet_peer __rcu *avl_left, *avl_right; - inet_peer_address_t daddr; + struct inetpeer_addr daddr; __u32 avl_height; struct list_head unused; __u32 dtime; /* the time of last use of not @@ -51,11 +51,11 @@ struct inet_peer { void inet_initpeers(void) __init; /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create); +struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create); static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) { - inet_peer_address_t daddr; + struct inetpeer_addr daddr; daddr.a4 = v4daddr; daddr.family = AF_INET; @@ -64,7 +64,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create) static inline struct inet_peer *inet_getpeer_v6(struct in6_addr *v6daddr, int create) { - inet_peer_address_t daddr; + struct inetpeer_addr daddr; ipv6_addr_copy((struct in6_addr *)daddr.a6, v6daddr); daddr.family = AF_INET6; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f95b89f3916d..d9bc85751c74 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -161,8 +161,8 @@ static void unlink_from_unused(struct inet_peer *p) } } -static int addr_compare(const inet_peer_address_t *a, - const inet_peer_address_t *b) +static int addr_compare(const struct inetpeer_addr *a, + const struct inetpeer_addr *b) { int i, n = (a->family == AF_INET ? 1 : 4); @@ -211,7 +211,7 @@ static int addr_compare(const inet_peer_address_t *a, * But every pointer we follow is guaranteed to be valid thanks to RCU. * We exit from this function if number of links exceeds PEER_MAXDEPTH */ -static struct inet_peer *lookup_rcu_bh(const inet_peer_address_t *daddr, +static struct inet_peer *lookup_rcu_bh(const struct inetpeer_addr *daddr, struct inet_peer_base *base) { struct inet_peer *u = rcu_dereference_bh(base->root); @@ -472,7 +472,7 @@ static int cleanup_once(unsigned long ttl) } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(inet_peer_address_t *daddr, int create) +struct inet_peer *inet_getpeer(struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; struct inet_peer_base *base = family_to_base(AF_INET); -- cgit v1.2.3-59-g8ed1b From 4399ce402c7c837dec80bf9fb40d079b39b9265a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 17:29:08 -0800 Subject: inetpeer: Fix incorrect comment about inetpeer struct size. Now with ipv6 support it is no longer less than 64 bytes. Signed-off-by: David S. Miller --- include/net/inetpeer.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 5161bfdf5a52..599d96e74114 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -35,7 +35,6 @@ struct inet_peer { * Once inet_peer is queued for deletion (refcnt == -1), following fields * are not available: rid, ip_id_count, tcp_ts, tcp_ts_stamp * We can share memory with rcu_head to keep inet_peer small - * (less then 64 bytes) */ union { struct { -- cgit v1.2.3-59-g8ed1b From ccb7c410ddc054b8c1ae780319bc98ae092d3854 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 1 Dec 2010 18:09:13 -0800 Subject: timewait_sock: Create and use getpeer op. The only thing AF-specific about remembering the timestamp for a time-wait TCP socket is getting the peer. Abstract that behind a new timewait_sock_ops vector. Support for real IPV6 sockets is not filled in yet, but curiously this makes timewait recycling start to work for v4-mapped ipv6 sockets. Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + include/net/timewait_sock.h | 8 ++++++++ net/ipv4/tcp_ipv4.c | 33 +++++++++++---------------------- net/ipv4/tcp_minisocks.c | 32 ++++++++++++++++++++++++-------- net/ipv6/tcp_ipv6.c | 26 +++++++++++++++++++------- 5 files changed, 63 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3e239641d4ee..4097320caa25 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -313,6 +313,7 @@ extern void tcp_shutdown (struct sock *sk, int how); extern int tcp_v4_rcv(struct sk_buff *skb); extern struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it); +extern void *tcp_v4_tw_get_peer(struct sock *sk); extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h index 97c3b14da55d..053b3cf2c66a 100644 --- a/include/net/timewait_sock.h +++ b/include/net/timewait_sock.h @@ -21,6 +21,7 @@ struct timewait_sock_ops { int (*twsk_unique)(struct sock *sk, struct sock *sktw, void *twp); void (*twsk_destructor)(struct sock *sk); + void *(*twsk_getpeer)(struct sock *sk); }; static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -39,4 +40,11 @@ static inline void twsk_destructor(struct sock *sk) sk->sk_prot->twsk_prot->twsk_destructor(sk); } +static inline void *twsk_getpeer(struct sock *sk) +{ + if (sk->sk_prot->twsk_prot->twsk_getpeer) + return sk->sk_prot->twsk_prot->twsk_getpeer(sk); + return NULL; +} + #endif /* _TIMEWAIT_SOCK_H */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0ddf819cfb5d..dd555051ec8b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1210,12 +1210,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static struct timewait_sock_ops tcp_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; @@ -1783,25 +1777,20 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it) } EXPORT_SYMBOL(tcp_v4_get_peer); -int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) +void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_peer *peer = inet_getpeer_v4(tw->tw_daddr, 1); - - if (peer) { - const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - - if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || - ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && - peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; - peer->tcp_ts = tcptw->tw_ts_recent; - } - inet_putpeer(peer); - return 1; - } + struct inet_timewait_sock *tw = inet_twsk(sk); - return 0; + return inet_getpeer_v4(tw->tw_daddr, 1); } +EXPORT_SYMBOL(tcp_v4_tw_get_peer); + +static struct timewait_sock_ops tcp_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v4_tw_get_peer, +}; const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 059082c873cf..3527b51d6159 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -78,6 +78,27 @@ static int tcp_remember_stamp(struct sock *sk) return 0; } +static int tcp_tw_remember_stamp(struct inet_timewait_sock *tw) +{ + struct sock *sk = (struct sock *) tw; + struct inet_peer *peer; + + peer = twsk_getpeer(sk); + if (peer) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || + ((u32)get_seconds() - peer->tcp_ts_stamp > TCP_PAWS_MSL && + peer->tcp_ts_stamp <= (u32)tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = (u32)tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; + } + inet_putpeer(peer); + return 1; + } + return 0; +} + static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -178,14 +199,9 @@ kill_with_rst: tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } - /* I am shamed, but failed to make it more elegant. - * Yes, it is direct reference to IP, which is impossible - * to generalize to IPv6. Taking into account that IPv6 - * do not understand recycling in any case, it not - * a big problem in practice. --ANK */ - if (tw->tw_family == AF_INET && - tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && - tcp_v4_tw_remember_stamp(tw)) + if (tcp_death_row.sysctl_tw_recycle && + tcptw->tw_ts_recent_stamp && + tcp_tw_remember_stamp(tw)) inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, TCP_TIMEWAIT_LEN); else diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e394d0029d8d..5f73a1808e36 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -906,12 +906,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { }; #endif -static struct timewait_sock_ops tcp6_timewait_sock_ops = { - .twsk_obj_size = sizeof(struct tcp6_timewait_sock), - .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, -}; - static void __tcp_v6_send_check(struct sk_buff *skb, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -1818,12 +1812,30 @@ do_time_wait: goto discard_it; } -struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) +{ + /* Alas, not yet... */ + return NULL; +} + +static void *tcp_v6_tw_get_peer(struct sock *sk) { + struct inet_timewait_sock *tw = inet_twsk(sk); + + if (tw->tw_family == AF_INET) + return tcp_v4_tw_get_peer(sk); + /* Alas, not yet... */ return NULL; } +static struct timewait_sock_ops tcp6_timewait_sock_ops = { + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), + .twsk_unique = tcp_twsk_unique, + .twsk_destructor= tcp_twsk_destructor, + .twsk_getpeer = tcp_v6_tw_get_peer, +}; + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit v1.2.3-59-g8ed1b From 15c054251ab84895ec043e90826612c1a3d6d4f1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Dec 2010 10:16:06 -0800 Subject: ipv6: Add rt6_get_peer() helper. To go along side ipv4's rt_get_peer(). Signed-off-by: David S. Miller --- include/net/ip6_route.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 23fed28db4bb..67d154a3f31b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -59,6 +59,15 @@ static inline unsigned int rt6_flags2srcprefs(int flags) extern void rt6_bind_peer(struct rt6_info *rt, int create); +static inline struct inet_peer *rt6_get_peer(struct rt6_info *rt) +{ + if (rt->rt6i_peer) + return rt->rt6i_peer; + + rt6_bind_peer(rt, 0); + return rt->rt6i_peer; +} + extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct net *net, -- cgit v1.2.3-59-g8ed1b From ae4694b2d3e4c0f47c0e804a68417be57e5daf85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 2 Dec 2010 10:59:22 -0800 Subject: ipv6: Create inet6_csk_route_req(). Brother of ipv4's inet_csk_route_req(). Signed-off-by: David S. Miller --- include/net/inet6_connection_sock.h | 3 +++ net/ipv6/inet6_connection_sock.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/net') diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h index aae08f686633..ff013505236b 100644 --- a/include/net/inet6_connection_sock.h +++ b/include/net/inet6_connection_sock.h @@ -25,6 +25,9 @@ struct sockaddr; extern int inet6_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb); +extern struct dst_entry* inet6_csk_route_req(struct sock *sk, + const struct request_sock *req); + extern struct request_sock *inet6_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __be16 rport, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 861d252bd1ba..e46305d1815a 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -54,6 +54,38 @@ int inet6_csk_bind_conflict(const struct sock *sk, EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict); +struct dst_entry *inet6_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct inet6_request_sock *treq = inet6_rsk(req); + struct ipv6_pinfo *np = inet6_sk(sk); + struct in6_addr *final_p, final; + struct dst_entry *dst; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr); + final_p = fl6_update_dst(&fl, np->opt, &final); + ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); + fl.oif = sk->sk_bound_dev_if; + fl.mark = sk->sk_mark; + fl.fl_ip_dport = inet_rsk(req)->rmt_port; + fl.fl_ip_sport = inet_rsk(req)->loc_port; + security_req_classify_flow(req, &fl); + + if (ip6_dst_lookup(sk, &dst, &fl)) + return NULL; + + if (final_p) + ipv6_addr_copy(&fl.fl6_dst, final_p); + + if ((xfrm_lookup(sock_net(sk), &dst, &fl, sk, 0)) < 0) + return NULL; + + return dst; +} + /* * request_sock (formerly open request) hash tables. */ -- cgit v1.2.3-59-g8ed1b From 547025d5d4d1056fb4b5a0c9c3c0d5c2fe22c082 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Thu, 2 Dec 2010 16:23:12 +0900 Subject: cfg80211: Add documentation for antenna ops The last patch with the same title was for mac80211 ops, accidentally. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 49a7c53a48ca..6b2af7aeddd3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1154,6 +1154,13 @@ struct cfg80211_pmksa { * @mgmt_frame_register: Notify driver that a management frame type was * registered. Note that this callback may not sleep, and cannot run * concurrently with itself. + * + * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device. + * Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may + * reject TX/RX mask combinations they cannot support by returning -EINVAL + * (also see nl80211.h @NL80211_ATTR_WIPHY_ANTENNA_TX). + * + * @get_antenna: Get current antenna configuration from device (tx_ant, rx_ant). */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); -- cgit v1.2.3-59-g8ed1b From a9527a3b621e507c85b639c183c3aa22afd4eb61 Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Dec 2010 18:04:43 +0000 Subject: net: snmp: fix the wrong ICMP_MIB_MAX value __ICMP_MIB_MAX is equal to the total number of icmp mib, So no need to add 1. This wastes 4/8 bytes memory. Change it to be same as ICMP6_MIB_MAX, TCP_MIB_MAX, UDP_MIB_MAX. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/snmp.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/snmp.h b/include/net/snmp.h index a0e61806d480..aebb55383c43 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -61,8 +61,7 @@ struct ipstats_mib { /* ICMP */ #define ICMP_MIB_DUMMY __ICMP_MIB_MAX -#define ICMP_MIB_MAX (__ICMP_MIB_MAX + 1) - +#define ICMP_MIB_MAX __ICMP_MIB_MAX struct icmp_mib { unsigned long mibs[ICMP_MIB_MAX]; }; -- cgit v1.2.3-59-g8ed1b From dca9b2404a6d6579828da2425c051462701efd3f Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Wed, 1 Dec 2010 18:05:17 +0000 Subject: net: kill unused macros from head file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These macros have been defined for several years since v2.6.12-rc2(tracing by git), but never be used. So remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 -- include/net/ip6_route.h | 1 - include/net/ndisc.h | 3 --- include/net/snmp.h | 1 - include/net/sock.h | 3 --- include/net/tcp.h | 6 ------ 6 files changed, 16 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a9441249306c..23710aa6a181 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,6 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define RETRANS_TIMER HZ - #define MAX_RTR_SOLICITATIONS 3 #define RTR_SOLICITATION_INTERVAL (4*HZ) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 67d154a3f31b..e06e0ca1e91b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -3,7 +3,6 @@ #define IP6_RT_PRIO_USER 1024 #define IP6_RT_PRIO_ADDRCONF 256 -#define IP6_RT_PRIO_KERN 512 struct route_info { __u8 type; diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 895997bc2ead..e0e594f8e9d9 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -42,9 +42,6 @@ enum { #define ND_REACHABLE_TIME (30*HZ) #define ND_RETRANS_TIMER HZ -#define ND_MIN_RANDOM_FACTOR (1/2) -#define ND_MAX_RANDOM_FACTOR (3/2) - #ifdef __KERNEL__ #include diff --git a/include/net/snmp.h b/include/net/snmp.h index aebb55383c43..762e2abce889 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -60,7 +60,6 @@ struct ipstats_mib { }; /* ICMP */ -#define ICMP_MIB_DUMMY __ICMP_MIB_MAX #define ICMP_MIB_MAX __ICMP_MIB_MAX struct icmp_mib { unsigned long mibs[ICMP_MIB_MAX]; diff --git a/include/net/sock.h b/include/net/sock.h index 5557dfb3dd68..717cfbf649df 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -516,9 +516,6 @@ static __inline__ void sk_add_bind_node(struct sock *sk, #define sk_nulls_for_each_from(__sk, node) \ if (__sk && ({ node = &(__sk)->sk_nulls_node; 1; })) \ hlist_nulls_for_each_entry_from(__sk, node, sk_nulls_node) -#define sk_for_each_continue(__sk, node) \ - if (__sk && ({ node = &(__sk)->sk_node; 1; })) \ - hlist_for_each_entry_continue(__sk, node, sk_node) #define sk_for_each_safe(__sk, node, tmp, list) \ hlist_for_each_entry_safe(__sk, node, tmp, list, sk_node) #define sk_for_each_bound(__sk, node, list) \ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4097320caa25..3f227baee4be 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -100,12 +100,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a * connection: ~180sec is RFC minimum */ - -#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned - * socket. 7 is ~50sec-16min. - */ - - #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ #define TCP_FIN_TIMEOUT TCP_TIMEWAIT_LEN -- cgit v1.2.3-59-g8ed1b From d265fef6ddf9042195aae551e1fde211c2a1588b Mon Sep 17 00:00:00 2001 From: Allan Stephens Date: Tue, 30 Nov 2010 12:00:53 +0000 Subject: tipc: Remove obsolete native API files and exports As part of the removal of TIPC's native API support it is no longer necessary for TIPC to export symbols for routines that can be called by kernel-based applications, nor for it to have header files that kernel-based applications can include to access the declarations for those routines. This commit eliminates the exporting of symbols by TIPC and migrates the contents of each obsolete native API include file into its corresponding non-native API equivalent. The code which was migrated in this commit was migrated intact, in that there are no technical changes combined with the relocation. Signed-off-by: Allan Stephens Signed-off-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/net/tipc/tipc.h | 186 ------------------------------------ include/net/tipc/tipc_bearer.h | 138 --------------------------- include/net/tipc/tipc_msg.h | 207 ----------------------------------------- include/net/tipc/tipc_port.h | 101 -------------------- net/tipc/bcast.c | 1 + net/tipc/bearer.h | 69 +++++++++++++- net/tipc/config.c | 2 +- net/tipc/core.c | 40 -------- net/tipc/core.h | 14 ++- net/tipc/eth_media.c | 6 +- net/tipc/msg.h | 168 +++++++++++++++++++++++++++++++-- net/tipc/port.h | 131 ++++++++++++++++++++++++++ net/tipc/socket.c | 3 +- net/tipc/subscr.c | 1 + net/tipc/user_reg.h | 5 + 15 files changed, 382 insertions(+), 690 deletions(-) delete mode 100644 include/net/tipc/tipc.h delete mode 100644 include/net/tipc/tipc_bearer.h delete mode 100644 include/net/tipc/tipc_msg.h delete mode 100644 include/net/tipc/tipc_port.h (limited to 'include/net') diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h deleted file mode 100644 index 1e0645e1eed2..000000000000 --- a/include/net/tipc/tipc.h +++ /dev/null @@ -1,186 +0,0 @@ -/* - * include/net/tipc/tipc.h: Main include file for TIPC users - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005,2010 Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _NET_TIPC_H_ -#define _NET_TIPC_H_ - -#ifdef __KERNEL__ - -#include -#include - -/* - * Native API - */ - -/* - * TIPC operating mode routines - */ - -#define TIPC_NOT_RUNNING 0 -#define TIPC_NODE_MODE 1 -#define TIPC_NET_MODE 2 - -typedef void (*tipc_mode_event)(void *usr_handle, int mode, u32 addr); - -int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); - -void tipc_detach(unsigned int userref); - -/* - * TIPC port manipulation routines - */ - -typedef void (*tipc_msg_err_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason, - struct tipc_portid const *attmpt_destid); - -typedef void (*tipc_named_msg_err_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason, - struct tipc_name_seq const *attmpt_dest); - -typedef void (*tipc_conn_shutdown_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - int reason); - -typedef void (*tipc_msg_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - unsigned int importance, - struct tipc_portid const *origin); - -typedef void (*tipc_named_msg_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size, - unsigned int importance, - struct tipc_portid const *orig, - struct tipc_name_seq const *dest); - -typedef void (*tipc_conn_msg_event) (void *usr_handle, - u32 portref, - struct sk_buff **buf, - unsigned char const *data, - unsigned int size); - -typedef void (*tipc_continue_event) (void *usr_handle, - u32 portref); - -int tipc_createport(unsigned int tipc_user, - void *usr_handle, - unsigned int importance, - tipc_msg_err_event error_cb, - tipc_named_msg_err_event named_error_cb, - tipc_conn_shutdown_event conn_error_cb, - tipc_msg_event message_cb, - tipc_named_msg_event named_message_cb, - tipc_conn_msg_event conn_message_cb, - tipc_continue_event continue_event_cb, - u32 *portref); - -int tipc_deleteport(u32 portref); - -int tipc_ownidentity(u32 portref, struct tipc_portid *port); - -int tipc_portimportance(u32 portref, unsigned int *importance); -int tipc_set_portimportance(u32 portref, unsigned int importance); - -int tipc_portunreliable(u32 portref, unsigned int *isunreliable); -int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); - -int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); -int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); - -int tipc_publish(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); -int tipc_withdraw(u32 portref, unsigned int scope, - struct tipc_name_seq const *name_seq); - -int tipc_connect2port(u32 portref, struct tipc_portid const *port); - -int tipc_disconnect(u32 portref); - -int tipc_shutdown(u32 ref); - -/* - * TIPC messaging routines - */ - -#define TIPC_PORT_IMPORTANCE 100 /* send using current port setting */ - - -int tipc_send(u32 portref, - unsigned int num_sect, - struct iovec const *msg_sect); - -int tipc_send2name(u32 portref, - struct tipc_name const *name, - u32 domain, - unsigned int num_sect, - struct iovec const *msg_sect); - -int tipc_send2port(u32 portref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect); - -int tipc_send_buf2port(u32 portref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz); - -int tipc_multicast(u32 portref, - struct tipc_name_seq const *seq, - u32 domain, /* currently unused */ - unsigned int section_count, - struct iovec const *msg); -#endif - -#endif diff --git a/include/net/tipc/tipc_bearer.h b/include/net/tipc/tipc_bearer.h deleted file mode 100644 index ee2f304e4919..000000000000 --- a/include/net/tipc/tipc_bearer.h +++ /dev/null @@ -1,138 +0,0 @@ -/* - * include/net/tipc/tipc_bearer.h: Include file for privileged access to TIPC bearers - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _NET_TIPC_BEARER_H_ -#define _NET_TIPC_BEARER_H_ - -#ifdef __KERNEL__ - -#include -#include -#include - -/* - * Identifiers of supported TIPC media types - */ - -#define TIPC_MEDIA_TYPE_ETH 1 - -/* - * Destination address structure used by TIPC bearers when sending messages - * - * IMPORTANT: The fields of this structure MUST be stored using the specified - * byte order indicated below, as the structure is exchanged between nodes - * as part of a link setup process. - */ - -struct tipc_media_addr { - __be32 type; /* bearer type (network byte order) */ - union { - __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */ -#if 0 - /* Prototypes for other possible bearer types */ - - struct { - __u16 sin_family; - __u16 sin_port; - struct { - __u32 s_addr; - } sin_addr; - char pad[4]; - } addr_in; /* IP-based bearer */ - __u16 sock_descr; /* generic socket bearer */ -#endif - } dev_addr; -}; - -/** - * struct tipc_bearer - TIPC bearer info available to privileged users - * @usr_handle: pointer to additional user-defined information about bearer - * @mtu: max packet size bearer can support - * @blocked: non-zero if bearer is blocked - * @lock: spinlock for controlling access to bearer - * @addr: media-specific address associated with bearer - * @name: bearer name (format = media:interface) - * - * Note: TIPC initializes "name" and "lock" fields; user is responsible for - * initialization all other fields when a bearer is enabled. - */ - -struct tipc_bearer { - void *usr_handle; - u32 mtu; - int blocked; - spinlock_t lock; - struct tipc_media_addr addr; - char name[TIPC_MAX_BEARER_NAME]; -}; - -/* - * TIPC routines available to supported media types - */ - -int tipc_register_media(u32 media_type, - char *media_name, - int (*enable)(struct tipc_bearer *), - void (*disable)(struct tipc_bearer *), - int (*send_msg)(struct sk_buff *, - struct tipc_bearer *, - struct tipc_media_addr *), - char *(*addr2str)(struct tipc_media_addr *a, - char *str_buf, - int str_size), - struct tipc_media_addr *bcast_addr, - const u32 bearer_priority, - const u32 link_tolerance, /* [ms] */ - const u32 send_window_limit); - -void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); - -int tipc_block_bearer(const char *name); -void tipc_continue(struct tipc_bearer *tb_ptr); - -int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); -int tipc_disable_bearer(const char *name); - -/* - * Routines made available to TIPC by supported media types - */ - -int tipc_eth_media_start(void); -void tipc_eth_media_stop(void); - -#endif - -#endif diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h deleted file mode 100644 index ffe50b4e7b93..000000000000 --- a/include/net/tipc/tipc_msg.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * include/net/tipc/tipc_msg.h: Include file for privileged access to TIPC message headers - * - * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _NET_TIPC_MSG_H_ -#define _NET_TIPC_MSG_H_ - -#ifdef __KERNEL__ - -struct tipc_msg { - __be32 hdr[15]; -}; - - -/* - TIPC user data message header format, version 2: - - - 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w0:|vers | user |hdr sz |n|d|s|-| message size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w2:| link level ack no | broadcast/link level seq no | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w3:| previous node | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w4:| originating port | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w5:| destination port | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w6:| originating node | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w7:| destination node | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w8:| name type / transport sequence number | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w9:| name instance/multicast lower bound | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - wA:| multicast upper bound | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - / / - \ options \ - / / - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - -*/ - -#define TIPC_CONN_MSG 0 -#define TIPC_MCAST_MSG 1 -#define TIPC_NAMED_MSG 2 -#define TIPC_DIRECT_MSG 3 - - -static inline u32 msg_word(struct tipc_msg *m, u32 pos) -{ - return ntohl(m->hdr[pos]); -} - -static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) -{ - return (msg_word(m, w) >> pos) & mask; -} - -static inline u32 msg_importance(struct tipc_msg *m) -{ - return msg_bits(m, 0, 25, 0xf); -} - -static inline u32 msg_hdr_sz(struct tipc_msg *m) -{ - return msg_bits(m, 0, 21, 0xf) << 2; -} - -static inline int msg_short(struct tipc_msg *m) -{ - return msg_hdr_sz(m) == 24; -} - -static inline u32 msg_size(struct tipc_msg *m) -{ - return msg_bits(m, 0, 0, 0x1ffff); -} - -static inline u32 msg_data_sz(struct tipc_msg *m) -{ - return msg_size(m) - msg_hdr_sz(m); -} - -static inline unchar *msg_data(struct tipc_msg *m) -{ - return ((unchar *)m) + msg_hdr_sz(m); -} - -static inline u32 msg_type(struct tipc_msg *m) -{ - return msg_bits(m, 1, 29, 0x7); -} - -static inline u32 msg_named(struct tipc_msg *m) -{ - return msg_type(m) == TIPC_NAMED_MSG; -} - -static inline u32 msg_mcast(struct tipc_msg *m) -{ - return msg_type(m) == TIPC_MCAST_MSG; -} - -static inline u32 msg_connected(struct tipc_msg *m) -{ - return msg_type(m) == TIPC_CONN_MSG; -} - -static inline u32 msg_errcode(struct tipc_msg *m) -{ - return msg_bits(m, 1, 25, 0xf); -} - -static inline u32 msg_prevnode(struct tipc_msg *m) -{ - return msg_word(m, 3); -} - -static inline u32 msg_origport(struct tipc_msg *m) -{ - return msg_word(m, 4); -} - -static inline u32 msg_destport(struct tipc_msg *m) -{ - return msg_word(m, 5); -} - -static inline u32 msg_mc_netid(struct tipc_msg *m) -{ - return msg_word(m, 5); -} - -static inline u32 msg_orignode(struct tipc_msg *m) -{ - if (likely(msg_short(m))) - return msg_prevnode(m); - return msg_word(m, 6); -} - -static inline u32 msg_destnode(struct tipc_msg *m) -{ - return msg_word(m, 7); -} - -static inline u32 msg_nametype(struct tipc_msg *m) -{ - return msg_word(m, 8); -} - -static inline u32 msg_nameinst(struct tipc_msg *m) -{ - return msg_word(m, 9); -} - -static inline u32 msg_namelower(struct tipc_msg *m) -{ - return msg_nameinst(m); -} - -static inline u32 msg_nameupper(struct tipc_msg *m) -{ - return msg_word(m, 10); -} - -#endif - -#endif diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h deleted file mode 100644 index 1893aaf49426..000000000000 --- a/include/net/tipc/tipc_port.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports - * - * Copyright (c) 1994-2007, Ericsson AB - * Copyright (c) 2005-2008, Wind River Systems - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the names of the copyright holders nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * Alternatively, this software may be distributed under the terms of the - * GNU General Public License ("GPL") version 2 as published by the Free - * Software Foundation. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _NET_TIPC_PORT_H_ -#define _NET_TIPC_PORT_H_ - -#ifdef __KERNEL__ - -#include -#include -#include - -#define TIPC_FLOW_CONTROL_WIN 512 - -/** - * struct tipc_port - native TIPC port info available to privileged users - * @usr_handle: pointer to additional user-defined information about port - * @lock: pointer to spinlock for controlling access to port - * @connected: non-zero if port is currently connected to a peer port - * @conn_type: TIPC type used when connection was established - * @conn_instance: TIPC instance used when connection was established - * @conn_unacked: number of unacknowledged messages received from peer port - * @published: non-zero if port has one or more associated names - * @congested: non-zero if cannot send because of link or port congestion - * @max_pkt: maximum packet size "hint" used when building messages sent by port - * @ref: unique reference to port in TIPC object registry - * @phdr: preformatted message header used when sending messages - */ - -struct tipc_port { - void *usr_handle; - spinlock_t *lock; - int connected; - u32 conn_type; - u32 conn_instance; - u32 conn_unacked; - int published; - u32 congested; - u32 max_pkt; - u32 ref; - struct tipc_msg phdr; -}; - - -struct tipc_port *tipc_createport_raw(void *usr_handle, - u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), - void (*wakeup)(struct tipc_port *), - const u32 importance); - -int tipc_reject_msg(struct sk_buff *buf, u32 err); - -int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); - -void tipc_acknowledge(u32 port_ref,u32 ack); - -struct tipc_port *tipc_get_port(const u32 ref); - -/* - * The following routines require that the port be locked on entry - */ - -int tipc_disconnect_port(struct tipc_port *tp_ptr); - - -#endif - -#endif - diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 22a60fc98392..7d449f03c385 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -47,6 +47,7 @@ #include "name_distr.h" #include "bearer.h" #include "name_table.h" +#include "port.h" #include "bcast.h" #define MAX_PKT_DEFAULT_MCAST 1500 /* bcast link max packet size (fixed) */ diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index a850b389663e..49af7fae8b5a 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -43,6 +43,45 @@ #define MAX_BEARERS 8 #define MAX_MEDIA 4 +/* + * Identifiers of supported TIPC media types + */ +#define TIPC_MEDIA_TYPE_ETH 1 + +/* + * Destination address structure used by TIPC bearers when sending messages + * + * IMPORTANT: The fields of this structure MUST be stored using the specified + * byte order indicated below, as the structure is exchanged between nodes + * as part of a link setup process. + */ +struct tipc_media_addr { + __be32 type; /* bearer type (network byte order) */ + union { + __u8 eth_addr[6]; /* 48 bit Ethernet addr (byte array) */ + } dev_addr; +}; + +/** + * struct tipc_bearer - TIPC bearer info available to media code + * @usr_handle: pointer to additional media-specific information about bearer + * @mtu: max packet size bearer can support + * @blocked: non-zero if bearer is blocked + * @lock: spinlock for controlling access to bearer + * @addr: media-specific address associated with bearer + * @name: bearer name (format = media:interface) + * + * Note: TIPC initializes "name" and "lock" fields; media code is responsible + * for initialization all other fields when a bearer is enabled. + */ +struct tipc_bearer { + void *usr_handle; + u32 mtu; + int blocked; + spinlock_t lock; + struct tipc_media_addr addr; + char name[TIPC_MAX_BEARER_NAME]; +}; /** * struct media - TIPC media information available to internal users @@ -55,7 +94,7 @@ * @priority: default link (and bearer) priority * @tolerance: default time (in ms) before declaring link failure * @window: default window (in packets) before declaring link congestion - * @type_id: TIPC media identifier [defined in tipc_bearer.h] + * @type_id: TIPC media identifier * @name: media name */ @@ -116,6 +155,34 @@ struct link; extern struct bearer tipc_bearers[]; +/* + * TIPC routines available to supported media types + */ +int tipc_register_media(u32 media_type, + char *media_name, int (*enable)(struct tipc_bearer *), + void (*disable)(struct tipc_bearer *), + int (*send_msg)(struct sk_buff *, + struct tipc_bearer *, struct tipc_media_addr *), + char *(*addr2str)(struct tipc_media_addr *a, + char *str_buf, int str_size), + struct tipc_media_addr *bcast_addr, const u32 bearer_priority, + const u32 link_tolerance, /* [ms] */ + const u32 send_window_limit); + +void tipc_recv_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr); + +int tipc_block_bearer(const char *name); +void tipc_continue(struct tipc_bearer *tb_ptr); + +int tipc_enable_bearer(const char *bearer_name, u32 bcast_scope, u32 priority); +int tipc_disable_bearer(const char *name); + +/* + * Routines made available to TIPC by supported media types + */ +int tipc_eth_media_start(void); +void tipc_eth_media_stop(void); + void tipc_media_addr_printf(struct print_buf *pb, struct tipc_media_addr *a); struct sk_buff *tipc_media_get_names(void); diff --git a/net/tipc/config.c b/net/tipc/config.c index 50a6133a3668..82267f3cd3b7 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -43,8 +43,8 @@ #include "addr.h" #include "name_table.h" #include "node.h" +#include "user_reg.h" #include "config.h" -#include "discover.h" struct subscr_data { char usr_handle[8]; diff --git a/net/tipc/core.c b/net/tipc/core.c index e2a09eb8efd4..785362f6a411 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -236,43 +236,3 @@ module_exit(tipc_exit); MODULE_DESCRIPTION("TIPC: Transparent Inter Process Communication"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_VERSION(TIPC_MOD_VER); - -/* Native TIPC API for kernel-space applications (see tipc.h) */ - -EXPORT_SYMBOL(tipc_attach); -EXPORT_SYMBOL(tipc_detach); -EXPORT_SYMBOL(tipc_createport); -EXPORT_SYMBOL(tipc_deleteport); -EXPORT_SYMBOL(tipc_ownidentity); -EXPORT_SYMBOL(tipc_portimportance); -EXPORT_SYMBOL(tipc_set_portimportance); -EXPORT_SYMBOL(tipc_portunreliable); -EXPORT_SYMBOL(tipc_set_portunreliable); -EXPORT_SYMBOL(tipc_portunreturnable); -EXPORT_SYMBOL(tipc_set_portunreturnable); -EXPORT_SYMBOL(tipc_publish); -EXPORT_SYMBOL(tipc_withdraw); -EXPORT_SYMBOL(tipc_connect2port); -EXPORT_SYMBOL(tipc_disconnect); -EXPORT_SYMBOL(tipc_shutdown); -EXPORT_SYMBOL(tipc_send); -EXPORT_SYMBOL(tipc_send2name); -EXPORT_SYMBOL(tipc_send2port); -EXPORT_SYMBOL(tipc_multicast); - -/* TIPC API for external bearers (see tipc_bearer.h) */ - -EXPORT_SYMBOL(tipc_block_bearer); -EXPORT_SYMBOL(tipc_continue); -EXPORT_SYMBOL(tipc_disable_bearer); -EXPORT_SYMBOL(tipc_enable_bearer); -EXPORT_SYMBOL(tipc_recv_msg); -EXPORT_SYMBOL(tipc_register_media); - -/* TIPC API for external APIs (see tipc_port.h) */ - -EXPORT_SYMBOL(tipc_createport_raw); -EXPORT_SYMBOL(tipc_reject_msg); -EXPORT_SYMBOL(tipc_send_buf_fast); -EXPORT_SYMBOL(tipc_acknowledge); - diff --git a/net/tipc/core.h b/net/tipc/core.h index e19389e57227..ca7e171c1043 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -39,10 +39,6 @@ #include #include -#include -#include -#include -#include #include #include #include @@ -62,6 +58,9 @@ #define TIPC_MOD_VER "2.0.0" +struct tipc_msg; /* msg.h */ +struct print_buf; /* dbg.h */ + /* * TIPC sanity test macros */ @@ -173,6 +172,13 @@ void tipc_dump_dbg(struct print_buf *, const char *fmt, ...); #define ELINKCONG EAGAIN /* link congestion <=> resource unavailable */ +/* + * TIPC operating mode routines + */ +#define TIPC_NOT_RUNNING 0 +#define TIPC_NODE_MODE 1 +#define TIPC_NET_MODE 2 + /* * Global configuration variables */ diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c index 6e988ba485fd..ee683cc8f4b1 100644 --- a/net/tipc/eth_media.c +++ b/net/tipc/eth_media.c @@ -34,13 +34,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include -#include -#include #include #include #include +#include "core.h" +#include "bearer.h" + #define MAX_ETH_BEARERS 2 #define ETH_LINK_PRIORITY TIPC_DEF_LINK_PRI #define ETH_LINK_TOLERANCE TIPC_DEF_LINK_TOL diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 031aad18efce..aee53864d7a0 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -37,10 +37,51 @@ #ifndef _TIPC_MSG_H #define _TIPC_MSG_H -#include "core.h" +#include "bearer.h" #define TIPC_VERSION 2 +/* + * TIPC user data message header format, version 2: + * + * + * 1 0 9 8 7 6 5 4|3 2 1 0 9 8 7 6|5 4 3 2 1 0 9 8|7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w0:|vers | user |hdr sz |n|d|s|-| message size | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w1:|mstyp| error |rer cnt|lsc|opt p| broadcast ack no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w2:| link level ack no | broadcast/link level seq no | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w3:| previous node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w4:| originating port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w5:| destination port | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w6:| originating node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w7:| destination node | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w8:| name type / transport sequence number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * w9:| name instance/multicast lower bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * wA:| multicast upper bound | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ options \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + */ + +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 + + #define SHORT_H_SIZE 24 /* Connected, in-cluster messages */ #define DIR_MSG_H_SIZE 32 /* Directly addressed messages */ #define LONG_H_SIZE 40 /* Named messages */ @@ -52,20 +93,26 @@ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) -/* - TIPC user data message header format, version 2 +struct tipc_msg { + __be32 hdr[15]; +}; - - Fundamental definitions available to privileged TIPC users - are located in tipc_msg.h. - - Remaining definitions available to TIPC internal users appear below. -*/ +static inline u32 msg_word(struct tipc_msg *m, u32 pos) +{ + return ntohl(m->hdr[pos]); +} static inline void msg_set_word(struct tipc_msg *m, u32 w, u32 val) { m->hdr[w] = htonl(val); } +static inline u32 msg_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask) +{ + return (msg_word(m, w) >> pos) & mask; +} + static inline void msg_set_bits(struct tipc_msg *m, u32 w, u32 pos, u32 mask, u32 val) { @@ -112,16 +159,36 @@ static inline void msg_set_user(struct tipc_msg *m, u32 n) msg_set_bits(m, 0, 25, 0xf, n); } +static inline u32 msg_importance(struct tipc_msg *m) +{ + return msg_bits(m, 0, 25, 0xf); +} + static inline void msg_set_importance(struct tipc_msg *m, u32 i) { msg_set_user(m, i); } +static inline u32 msg_hdr_sz(struct tipc_msg *m) +{ + return msg_bits(m, 0, 21, 0xf) << 2; +} + static inline void msg_set_hdr_sz(struct tipc_msg *m,u32 n) { msg_set_bits(m, 0, 21, 0xf, n>>2); } +static inline u32 msg_size(struct tipc_msg *m) +{ + return msg_bits(m, 0, 0, 0x1ffff); +} + +static inline u32 msg_data_sz(struct tipc_msg *m) +{ + return msg_size(m) - msg_hdr_sz(m); +} + static inline int msg_non_seq(struct tipc_msg *m) { return msg_bits(m, 0, 20, 1); @@ -162,11 +229,36 @@ static inline void msg_set_size(struct tipc_msg *m, u32 sz) * Word 1 */ +static inline u32 msg_type(struct tipc_msg *m) +{ + return msg_bits(m, 1, 29, 0x7); +} + static inline void msg_set_type(struct tipc_msg *m, u32 n) { msg_set_bits(m, 1, 29, 0x7, n); } +static inline u32 msg_named(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_NAMED_MSG; +} + +static inline u32 msg_mcast(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_MCAST_MSG; +} + +static inline u32 msg_connected(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_CONN_MSG; +} + +static inline u32 msg_errcode(struct tipc_msg *m) +{ + return msg_bits(m, 1, 25, 0xf); +} + static inline void msg_set_errcode(struct tipc_msg *m, u32 err) { msg_set_bits(m, 1, 25, 0xf, err); @@ -257,31 +349,68 @@ static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) */ +static inline u32 msg_prevnode(struct tipc_msg *m) +{ + return msg_word(m, 3); +} + static inline void msg_set_prevnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 3, a); } +static inline u32 msg_origport(struct tipc_msg *m) +{ + return msg_word(m, 4); +} + static inline void msg_set_origport(struct tipc_msg *m, u32 p) { msg_set_word(m, 4, p); } +static inline u32 msg_destport(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_destport(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline u32 msg_mc_netid(struct tipc_msg *m) +{ + return msg_word(m, 5); +} + static inline void msg_set_mc_netid(struct tipc_msg *m, u32 p) { msg_set_word(m, 5, p); } +static inline int msg_short(struct tipc_msg *m) +{ + return msg_hdr_sz(m) == 24; +} + +static inline u32 msg_orignode(struct tipc_msg *m) +{ + if (likely(msg_short(m))) + return msg_prevnode(m); + return msg_word(m, 6); +} + static inline void msg_set_orignode(struct tipc_msg *m, u32 a) { msg_set_word(m, 6, a); } +static inline u32 msg_destnode(struct tipc_msg *m) +{ + return msg_word(m, 7); +} + static inline void msg_set_destnode(struct tipc_msg *m, u32 a) { msg_set_word(m, 7, a); @@ -299,6 +428,11 @@ static inline u32 msg_routed(struct tipc_msg *m) return(msg_destnode(m) ^ msg_orignode(m)) >> 11; } +static inline u32 msg_nametype(struct tipc_msg *m) +{ + return msg_word(m, 8); +} + static inline void msg_set_nametype(struct tipc_msg *m, u32 n) { msg_set_word(m, 8, n); @@ -324,6 +458,16 @@ static inline void msg_set_transp_seqno(struct tipc_msg *m, u32 n) msg_set_word(m, 8, n); } +static inline u32 msg_nameinst(struct tipc_msg *m) +{ + return msg_word(m, 9); +} + +static inline u32 msg_namelower(struct tipc_msg *m) +{ + return msg_nameinst(m); +} + static inline void msg_set_namelower(struct tipc_msg *m, u32 n) { msg_set_word(m, 9, n); @@ -334,11 +478,21 @@ static inline void msg_set_nameinst(struct tipc_msg *m, u32 n) msg_set_namelower(m, n); } +static inline u32 msg_nameupper(struct tipc_msg *m) +{ + return msg_word(m, 10); +} + static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) { msg_set_word(m, 10, n); } +static inline unchar *msg_data(struct tipc_msg *m) +{ + return ((unchar *)m) + msg_hdr_sz(m); +} + static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) { return (struct tipc_msg *)msg_data(m); diff --git a/net/tipc/port.h b/net/tipc/port.h index 73bbf442b346..8b9d87a3efae 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -44,6 +44,39 @@ #include "dbg.h" #include "node_subscr.h" +#define TIPC_FLOW_CONTROL_WIN 512 + +typedef void (*tipc_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_portid const *attmpt_destid); + +typedef void (*tipc_named_msg_err_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason, + struct tipc_name_seq const *attmpt_dest); + +typedef void (*tipc_conn_shutdown_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, int reason); + +typedef void (*tipc_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *origin); + +typedef void (*tipc_named_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size, unsigned int importance, + struct tipc_portid const *orig, + struct tipc_name_seq const *dest); + +typedef void (*tipc_conn_msg_event) (void *usr_handle, u32 portref, + struct sk_buff **buf, unsigned char const *data, + unsigned int size); + +typedef void (*tipc_continue_event) (void *usr_handle, u32 portref); + /** * struct user_port - TIPC user port (used with native API) * @user_ref: id of user who created user port @@ -67,6 +100,34 @@ struct user_port { struct list_head uport_list; }; +/** + * struct tipc_port - TIPC port info available to socket API + * @usr_handle: pointer to additional user-defined information about port + * @lock: pointer to spinlock for controlling access to port + * @connected: non-zero if port is currently connected to a peer port + * @conn_type: TIPC type used when connection was established + * @conn_instance: TIPC instance used when connection was established + * @conn_unacked: number of unacknowledged messages received from peer port + * @published: non-zero if port has one or more associated names + * @congested: non-zero if cannot send because of link or port congestion + * @max_pkt: maximum packet size "hint" used when building messages sent by port + * @ref: unique reference to port in TIPC object registry + * @phdr: preformatted message header used when sending messages + */ +struct tipc_port { + void *usr_handle; + spinlock_t *lock; + int connected; + u32 conn_type; + u32 conn_instance; + u32 conn_unacked; + int published; + u32 congested; + u32 max_pkt; + u32 ref; + struct tipc_msg phdr; +}; + /** * struct port - TIPC port structure * @publ: TIPC port info available to privileged users @@ -109,6 +170,76 @@ struct port { extern spinlock_t tipc_port_list_lock; struct port_list; +/* + * TIPC port manipulation routines + */ +struct tipc_port *tipc_createport_raw(void *usr_handle, + u32 (*dispatcher)(struct tipc_port *, struct sk_buff *), + void (*wakeup)(struct tipc_port *), const u32 importance); + +int tipc_reject_msg(struct sk_buff *buf, u32 err); + +int tipc_send_buf_fast(struct sk_buff *buf, u32 destnode); + +void tipc_acknowledge(u32 port_ref, u32 ack); + +int tipc_createport(unsigned int tipc_user, void *usr_handle, + unsigned int importance, tipc_msg_err_event error_cb, + tipc_named_msg_err_event named_error_cb, + tipc_conn_shutdown_event conn_error_cb, tipc_msg_event msg_cb, + tipc_named_msg_event named_msg_cb, + tipc_conn_msg_event conn_msg_cb, + tipc_continue_event continue_event_cb, u32 *portref); + +int tipc_deleteport(u32 portref); + +int tipc_ownidentity(u32 portref, struct tipc_portid *port); + +int tipc_portimportance(u32 portref, unsigned int *importance); +int tipc_set_portimportance(u32 portref, unsigned int importance); + +int tipc_portunreliable(u32 portref, unsigned int *isunreliable); +int tipc_set_portunreliable(u32 portref, unsigned int isunreliable); + +int tipc_portunreturnable(u32 portref, unsigned int *isunreturnable); +int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable); + +int tipc_publish(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); +int tipc_withdraw(u32 portref, unsigned int scope, + struct tipc_name_seq const *name_seq); + +int tipc_connect2port(u32 portref, struct tipc_portid const *port); + +int tipc_disconnect(u32 portref); + +int tipc_shutdown(u32 ref); + + +/* + * The following routines require that the port be locked on entry + */ +int tipc_disconnect_port(struct tipc_port *tp_ptr); + +/* + * TIPC messaging routines + */ +#define TIPC_PORT_IMPORTANCE 100 /* send using current port setting */ + +int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send2port(u32 portref, struct tipc_portid const *dest, + unsigned int num_sect, struct iovec const *msg_sect); + +int tipc_send_buf2port(u32 portref, struct tipc_portid const *dest, + struct sk_buff *buf, unsigned int dsz); + +int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain, + unsigned int section_count, struct iovec const *msg); + int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e9f0d5004483..23a12e44347f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -49,10 +49,9 @@ #include #include -#include -#include #include "core.h" +#include "port.h" #define SS_LISTENING -1 /* socket is listening */ #define SS_READY -2 /* socket is connectionless */ diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 33313961d010..a857e6ea857e 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -37,6 +37,7 @@ #include "core.h" #include "dbg.h" #include "name_table.h" +#include "user_reg.h" #include "port.h" #include "ref.h" #include "subscr.h" diff --git a/net/tipc/user_reg.h b/net/tipc/user_reg.h index 81dc12e2882f..a05981fb9176 100644 --- a/net/tipc/user_reg.h +++ b/net/tipc/user_reg.h @@ -42,6 +42,11 @@ int tipc_reg_start(void); void tipc_reg_stop(void); +typedef void (*tipc_mode_event)(void *usr_handle, int mode, u32 addr); + +int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); +void tipc_detach(unsigned int userref); + int tipc_reg_add_port(struct user_port *up_ptr); int tipc_reg_remove_port(struct user_port *up_ptr); -- cgit v1.2.3-59-g8ed1b From 46bcf14f44d8f31ecfdc8b6708ec15a3b33316d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Dec 2010 09:29:43 -0800 Subject: filter: fix sk_filter rcu handling Pavel Emelyanov tried to fix a race between sk_filter_(de|at)tach and sk_clone() in commit 47e958eac280c263397 Problem is we can have several clones sharing a common sk_filter, and these clones might want to sk_filter_attach() their own filters at the same time, and can overwrite old_filter->rcu, corrupting RCU queues. We can not use filter->rcu without being sure no other thread could do the same thing. Switch code to a more conventional ref-counting technique : Do the atomic decrement immediately and queue one rcu call back when last reference is released. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/filter.c | 19 ++++++------------- 2 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index a6338d039857..659d968d95c5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1155,6 +1155,8 @@ extern void sk_common_release(struct sock *sk); /* Initialise core socket variables */ extern void sock_init_data(struct socket *sock, struct sock *sk); +extern void sk_filter_release_rcu(struct rcu_head *rcu); + /** * sk_filter_release - release a socket filter * @fp: filter to remove @@ -1165,7 +1167,7 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); static inline void sk_filter_release(struct sk_filter *fp) { if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_release_rcu); } static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index c1ee800bc080..ae21a0d3c4a2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -589,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen) EXPORT_SYMBOL(sk_chk_filter); /** - * sk_filter_rcu_release - Release a socket filter by rcu_head + * sk_filter_release_rcu - Release a socket filter by rcu_head * @rcu: rcu_head that contains the sk_filter to free */ -static void sk_filter_rcu_release(struct rcu_head *rcu) +void sk_filter_release_rcu(struct rcu_head *rcu) { struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - sk_filter_release(fp); -} - -static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp) -{ - unsigned int size = sk_filter_len(fp); - - atomic_sub(size, &sk->sk_omem_alloc); - call_rcu_bh(&fp->rcu, sk_filter_rcu_release); + kfree(fp); } +EXPORT_SYMBOL(sk_filter_release_rcu); /** * sk_attach_filter - attach a socket filter @@ -649,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) rcu_assign_pointer(sk->sk_filter, fp); if (old_fp) - sk_filter_delayed_uncharge(sk, old_fp); + sk_filter_uncharge(sk, old_fp); return 0; } EXPORT_SYMBOL_GPL(sk_attach_filter); @@ -663,7 +656,7 @@ int sk_detach_filter(struct sock *sk) sock_owned_by_user(sk)); if (filter) { rcu_assign_pointer(sk->sk_filter, NULL); - sk_filter_delayed_uncharge(sk, filter); + sk_filter_uncharge(sk, filter); ret = 0; } return ret; -- cgit v1.2.3-59-g8ed1b From 45904f21655cf4f0ae7d0fab5906fe51bf56ecf4 Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Fri, 3 Dec 2010 09:20:40 +0100 Subject: nl80211/mac80211: define and allow configuring mesh element TTL The TTL in path selection information elements is different from the mesh ttl used in mesh data frames. Version 7.03 of the 11s draft calls this ttl 'Element TTL'. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 2 ++ net/mac80211/cfg.c | 2 ++ net/mac80211/debugfs_netdev.c | 2 ++ net/mac80211/mesh.c | 1 + net/mac80211/mesh.h | 2 ++ net/mac80211/mesh_hwmp.c | 9 +++++---- net/mac80211/mesh_pathtbl.c | 7 ++++--- net/wireless/nl80211.c | 5 +++++ 9 files changed, 27 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 5cfa579df476..9e541452d805 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1547,6 +1547,9 @@ enum nl80211_mntr_flags { * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh * point. * + * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a + * source mesh point for path selection elements. + * * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically * open peer links when we detect compatible mesh peers. * @@ -1593,6 +1596,7 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, NL80211_MESHCONF_HWMP_ROOTMODE, + NL80211_MESHCONF_ELEMENT_TTL, /* keep last */ __NL80211_MESHCONF_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6b2af7aeddd3..93a4b2068334 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -624,6 +624,8 @@ struct mesh_config { u16 dot11MeshMaxPeerLinks; u8 dot11MeshMaxRetries; u8 dot11MeshTTL; + /* ttl used in path selection information elements */ + u8 element_ttl; bool auto_open_plinks; /* HWMP parameters */ u8 dot11MeshHWMPmaxPREQretries; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index db134b500caa..ce6936890c26 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1024,6 +1024,8 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, conf->dot11MeshMaxRetries = nconf->dot11MeshMaxRetries; if (_chg_mesh_attr(NL80211_MESHCONF_TTL, mask)) conf->dot11MeshTTL = nconf->dot11MeshTTL; + if (_chg_mesh_attr(NL80211_MESHCONF_ELEMENT_TTL, mask)) + conf->dot11MeshTTL = nconf->element_ttl; if (_chg_mesh_attr(NL80211_MESHCONF_AUTO_OPEN_PLINKS, mask)) conf->auto_open_plinks = nconf->auto_open_plinks; if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, mask)) diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index cbdf36d7841c..2dabdf7680d0 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -251,6 +251,7 @@ IEEE80211_IF_FILE(dot11MeshConfirmTimeout, IEEE80211_IF_FILE(dot11MeshHoldingTimeout, u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC); IEEE80211_IF_FILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC); +IEEE80211_IF_FILE(element_ttl, u.mesh.mshcfg.element_ttl, DEC); IEEE80211_IF_FILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC); IEEE80211_IF_FILE(dot11MeshMaxPeerLinks, u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC); @@ -355,6 +356,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata) MESHPARAMS_ADD(dot11MeshConfirmTimeout); MESHPARAMS_ADD(dot11MeshHoldingTimeout); MESHPARAMS_ADD(dot11MeshTTL); + MESHPARAMS_ADD(element_ttl); MESHPARAMS_ADD(auto_open_plinks); MESHPARAMS_ADD(dot11MeshMaxPeerLinks); MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c8a4f19ed13b..78a36c79bdcc 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -668,6 +668,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; ifmsh->mshcfg.dot11MeshTTL = MESH_TTL; + ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL; ifmsh->mshcfg.auto_open_plinks = true; ifmsh->mshcfg.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 58e741128968..182942eeac4d 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -216,6 +216,8 @@ struct mesh_rmc { #define PERR_RCODE_NO_ROUTE 12 #define PERR_RCODE_DEST_UNREACH 13 +#define MESH_DEFAULT_ELEMENT_TTL 31 + /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 829e08a657d0..5bf64d7112b3 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -232,7 +232,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn, *pos++ = WLAN_EID_PERR; *pos++ = ie_len; /* ttl */ - *pos++ = MESH_TTL; + *pos++ = ttl; /* number of destinations */ *pos++ = 1; /* @@ -522,7 +522,7 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, if (reply) { lifetime = PREQ_IE_LIFETIME(preq_elem); - ttl = ifmsh->mshcfg.dot11MeshTTL; + ttl = ifmsh->mshcfg.element_ttl; if (ttl != 0) { mhwmp_dbg("replying to the PREQ\n"); mesh_path_sel_frame_tx(MPATH_PREP, 0, target_addr, @@ -877,7 +877,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) sdata->u.mesh.last_sn_update = jiffies; } lifetime = default_lifetime(sdata); - ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + ttl = sdata->u.mesh.mshcfg.element_ttl; if (ttl == 0) { sdata->u.mesh.mshstats.dropped_frames_ttl++; spin_unlock_bh(&mpath->state_lock); @@ -1013,5 +1013,6 @@ mesh_path_tx_root_frame(struct ieee80211_sub_if_data *sdata) mesh_path_sel_frame_tx(MPATH_RANN, 0, sdata->vif.addr, cpu_to_le32(++ifmsh->sn), 0, NULL, 0, broadcast_addr, - 0, MESH_TTL, 0, 0, 0, sdata); + 0, sdata->u.mesh.mshcfg.element_ttl, + 0, 0, 0, sdata); } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 349e466cf08b..8d65b47d9837 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -467,8 +467,8 @@ void mesh_plink_broken(struct sta_info *sta) mpath->flags &= ~MESH_PATH_ACTIVE; ++mpath->sn; spin_unlock_bh(&mpath->state_lock); - mesh_path_error_tx(MESH_TTL, mpath->dst, - cpu_to_le32(mpath->sn), + mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, + mpath->dst, cpu_to_le32(mpath->sn), cpu_to_le16(PERR_RCODE_DEST_UNREACH), bcast, sdata); } else @@ -614,7 +614,8 @@ void mesh_path_discard_frame(struct sk_buff *skb, mpath = mesh_path_lookup(da, sdata); if (mpath) sn = ++mpath->sn; - mesh_path_error_tx(MESH_TTL, skb->data, cpu_to_le32(sn), + mesh_path_error_tx(sdata->u.mesh.mshcfg.element_ttl, skb->data, + cpu_to_le32(sn), cpu_to_le16(PERR_RCODE_NO_ROUTE), ra, sdata); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 960be4e650f0..0b90cab5da2f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2582,6 +2582,8 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, cur_params.dot11MeshMaxRetries); NLA_PUT_U8(msg, NL80211_MESHCONF_TTL, cur_params.dot11MeshTTL); + NLA_PUT_U8(msg, NL80211_MESHCONF_ELEMENT_TTL, + cur_params.element_ttl); NLA_PUT_U8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, cur_params.auto_open_plinks); NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, @@ -2623,6 +2625,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_MAX_PEER_LINKS] = { .type = NLA_U16 }, [NL80211_MESHCONF_MAX_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_TTL] = { .type = NLA_U8 }, + [NL80211_MESHCONF_ELEMENT_TTL] = { .type = NLA_U8 }, [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = { .type = NLA_U8 }, [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, @@ -2670,6 +2673,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask, NL80211_MESHCONF_TTL, nla_get_u8); + FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, + mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, -- cgit v1.2.3-59-g8ed1b From f9e10ce4cf86945eb5efcab31284c971877ed012 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Dec 2010 09:20:42 +0100 Subject: cfg80211: require add_virtual_intf to return new dev cfg80211 used to do all its bookkeeping in the notifier, but some new stuff will have to use local variables so make the callback return the netdev pointer. Tested-by: Javier Cardona Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 11 +++++++---- net/mac80211/cfg.c | 20 ++++++++++++-------- net/wireless/nl80211.c | 7 +++++-- 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 93a4b2068334..902895dfbd49 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1033,7 +1033,8 @@ struct cfg80211_pmksa { * * @add_virtual_intf: create a new virtual interface with the given name, * must set the struct wireless_dev's iftype. Beware: You must create - * the new netdev in the wiphy's network namespace! + * the new netdev in the wiphy's network namespace! Returns the netdev, + * or an ERR_PTR. * * @del_virtual_intf: remove the virtual interface determined by ifindex. * @@ -1168,9 +1169,11 @@ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); int (*resume)(struct wiphy *wiphy); - int (*add_virtual_intf)(struct wiphy *wiphy, char *name, - enum nl80211_iftype type, u32 *flags, - struct vif_params *params); + struct net_device * (*add_virtual_intf)(struct wiphy *wiphy, + char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params); int (*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev); int (*change_virtual_intf)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ce6936890c26..d34c7c3dd762 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -19,9 +19,10 @@ #include "rate.h" #include "mesh.h" -static int ieee80211_add_iface(struct wiphy *wiphy, char *name, - enum nl80211_iftype type, u32 *flags, - struct vif_params *params) +static struct net_device *ieee80211_add_iface(struct wiphy *wiphy, char *name, + enum nl80211_iftype type, + u32 *flags, + struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; @@ -29,12 +30,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, int err; err = ieee80211_if_add(local, name, &dev, type, params); - if (err || type != NL80211_IFTYPE_MONITOR || !flags) - return err; + if (err) + return ERR_PTR(err); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata->u.mntr_flags = *flags; - return 0; + if (type == NL80211_IFTYPE_MONITOR && flags) { + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata->u.mntr_flags = *flags; + } + + return dev; } static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0b90cab5da2f..cc2e5d6163de 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1368,6 +1368,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; + struct net_device *dev; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -1403,11 +1404,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); - err = rdev->ops->add_virtual_intf(&rdev->wiphy, + dev = rdev->ops->add_virtual_intf(&rdev->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); + if (IS_ERR(dev)) + return PTR_ERR(dev); - return err; + return 0; } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) -- cgit v1.2.3-59-g8ed1b From 29cbe68c516a48a9a88b3226878570c6cbd83c02 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 3 Dec 2010 09:20:44 +0100 Subject: cfg80211/mac80211: add mesh join/leave commands Instead of tying mesh activity to interface up, add join and leave commands for mesh. Since we must be backward compatible, let cfg80211 handle joining a mesh if a mesh ID was pre-configured when the device goes up. Note that this therefore must modify mac80211 as well since mac80211 needs to lose the logic to start the mesh on interface up. We now allow querying mesh parameters before the mesh is connected, which simply returns defaults. Setting them (internally renamed to "update") is only allowed while connected. Specify them with the new mesh join command instead where needed. In mac80211, beaconing must now also follow the mesh enabled/not enabled state, which is done by testing the mesh ID. Signed-off-by: Javier Cardona Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 8 +++ include/net/cfg80211.h | 38 +++++++++--- net/mac80211/cfg.c | 39 ++++++++++--- net/mac80211/ieee80211_i.h | 13 ----- net/mac80211/iface.c | 14 +---- net/mac80211/main.c | 3 +- net/mac80211/mesh.c | 26 ++------- net/mac80211/mesh.h | 25 -------- net/wireless/Makefile | 2 +- net/wireless/core.c | 15 ++++- net/wireless/core.h | 13 +++++ net/wireless/mesh.c | 140 +++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 137 +++++++++++++++++++++++++++++++++++++------- net/wireless/util.c | 1 + 14 files changed, 359 insertions(+), 115 deletions(-) create mode 100644 net/wireless/mesh.c (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 9e541452d805..410a06ea551b 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -394,6 +394,11 @@ * * @NL80211_CMD_SET_WDS_PEER: Set the MAC address of the peer on a WDS interface. * + * @NL80211_CMD_JOIN_MESH: Join a mesh. The mesh ID must be given, and initial + * mesh config parameters may be given. + * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the + * network is determined by the network interface. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -500,6 +505,9 @@ enum nl80211_commands { NL80211_CMD_FRAME_WAIT_CANCEL, + NL80211_CMD_JOIN_MESH, + NL80211_CMD_LEAVE_MESH, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 902895dfbd49..788c3989a9e8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -258,13 +258,9 @@ struct ieee80211_supported_band { /** * struct vif_params - describes virtual interface parameters - * @mesh_id: mesh ID to use - * @mesh_id_len: length of the mesh ID * @use_4addr: use 4-address frames */ struct vif_params { - u8 *mesh_id; - int mesh_id_len; int use_4addr; }; @@ -615,6 +611,11 @@ struct bss_parameters { int ap_isolate; }; +/* + * struct mesh_config - 802.11s mesh configuration + * + * These parameters can be changed while the mesh is active. + */ struct mesh_config { /* Timeouts in ms */ /* Mesh plink management parameters */ @@ -637,6 +638,18 @@ struct mesh_config { u8 dot11MeshHWMPRootMode; }; +/** + * struct mesh_setup - 802.11s mesh setup configuration + * @mesh_id: the mesh ID + * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes + * + * These parameters are fixed when the mesh is created. + */ +struct mesh_setup { + const u8 *mesh_id; + u8 mesh_id_len; +}; + /** * struct ieee80211_txq_params - TX queue parameters * @queue: TX queue identifier (NL80211_TXQ_Q_*) @@ -1078,7 +1091,7 @@ struct cfg80211_pmksa { * * @get_mesh_params: Put the current mesh parameters into *params * - * @set_mesh_params: Set mesh parameters. + * @update_mesh_params: Update mesh parameters on a running mesh. * The mask is a bitfield which tells us which parameters to * set, and which to leave alone. * @@ -1229,9 +1242,14 @@ struct cfg80211_ops { int (*get_mesh_params)(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf); - int (*set_mesh_params)(struct wiphy *wiphy, - struct net_device *dev, - const struct mesh_config *nconf, u32 mask); + int (*update_mesh_params)(struct wiphy *wiphy, + struct net_device *dev, u32 mask, + const struct mesh_config *nconf); + int (*join_mesh)(struct wiphy *wiphy, struct net_device *dev, + const struct mesh_config *conf, + const struct mesh_setup *setup); + int (*leave_mesh)(struct wiphy *wiphy, struct net_device *dev); + int (*change_bss)(struct wiphy *wiphy, struct net_device *dev, struct bss_parameters *params); @@ -1647,6 +1665,8 @@ struct cfg80211_cached_keys; * @bssid: (private) Used by the internal configuration code * @ssid: (private) Used by the internal configuration code * @ssid_len: (private) Used by the internal configuration code + * @mesh_id_len: (private) Used by the internal configuration code + * @mesh_id_up_len: (private) Used by the internal configuration code * @wext: (private) Used by the internal wireless extensions compat code * @use_4addr: indicates 4addr mode is used on this interface, must be * set by driver (if supported) on add_interface BEFORE registering the @@ -1676,7 +1696,7 @@ struct wireless_dev { /* currently used for IBSS and SME - might be rearranged later */ u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 ssid_len; + u8 ssid_len, mesh_id_len, mesh_id_up_len; enum { CFG80211_SME_IDLE, CFG80211_SME_CONNECTING, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d34c7c3dd762..68329d713c02 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -60,11 +60,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (ret) return ret; - if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) - ieee80211_sdata_set_mesh_id(sdata, - params->mesh_id_len, - params->mesh_id); - if (type == NL80211_IFTYPE_AP_VLAN && params && params->use_4addr == 0) rcu_assign_pointer(sdata->u.vlan.sta, NULL); @@ -1003,9 +998,9 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) return (mask >> (parm-1)) & 0x1; } -static int ieee80211_set_mesh_params(struct wiphy *wiphy, - struct net_device *dev, - const struct mesh_config *nconf, u32 mask) +static int ieee80211_update_mesh_params(struct wiphy *wiphy, + struct net_device *dev, u32 mask, + const struct mesh_config *nconf) { struct mesh_config *conf; struct ieee80211_sub_if_data *sdata; @@ -1056,6 +1051,30 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, return 0; } +static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, + const struct mesh_config *conf, + const struct mesh_setup *setup) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config)); + ifmsh->mesh_id_len = setup->mesh_id_len; + memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); + + ieee80211_start_mesh(sdata); + + return 0; +} + +static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_stop_mesh(sdata); + + return 0; +} #endif static int ieee80211_change_bss(struct wiphy *wiphy, @@ -1760,8 +1779,10 @@ struct cfg80211_ops mac80211_config_ops = { .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, - .set_mesh_params = ieee80211_set_mesh_params, + .update_mesh_params = ieee80211_update_mesh_params, .get_mesh_params = ieee80211_get_mesh_params, + .join_mesh = ieee80211_join_mesh, + .leave_mesh = ieee80211_leave_mesh, #endif .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e7c880725639..72499fe5fc36 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -609,19 +609,6 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } -static inline void -ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, - u8 mesh_id_len, u8 *mesh_id) -{ -#ifdef CONFIG_MAC80211_MESH - struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; - ifmsh->mesh_id_len = mesh_id_len; - memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len); -#else - WARN_ON(1); -#endif -} - enum sdata_queue_type { IEEE80211_SDATA_QUEUE_TYPE_FRAME = 0, IEEE80211_SDATA_QUEUE_AGG_START = 1, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 96e27f1e79fb..f0f11bb794af 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -268,9 +268,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) goto err_stop; } - if (ieee80211_vif_is_mesh(&sdata->vif)) { - ieee80211_start_mesh(sdata); - } else if (sdata->vif.type == NL80211_IFTYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; local->fif_probe_req++; @@ -495,10 +493,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); ieee80211_configure_filter(local); break; - case NL80211_IFTYPE_MESH_POINT: - if (ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_stop_mesh(sdata); - /* fall through */ default: flush_work(&sdata->work); /* @@ -1188,12 +1182,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (ret) goto fail; - if (ieee80211_vif_is_mesh(&sdata->vif) && - params && params->mesh_id_len) - ieee80211_sdata_set_mesh_id(sdata, - params->mesh_id_len, - params->mesh_id); - mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); mutex_unlock(&local->iflist_mtx); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 107a0cbe52ac..2de69766c6aa 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -246,7 +246,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, !!sdata->u.ibss.presp; break; case NL80211_IFTYPE_MESH_POINT: - sdata->vif.bss_conf.enable_beacon = true; + sdata->vif.bss_conf.enable_beacon = + !!sdata->u.mesh.mesh_id_len; break; default: /* not reached */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 0d3234875ac5..63e1188d5062 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -530,6 +530,11 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + ifmsh->mesh_id_len = 0; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + sta_info_flush(local, NULL); del_timer_sync(&sdata->u.mesh.housekeeping_timer); del_timer_sync(&sdata->u.mesh.mesh_path_root_timer); @@ -674,27 +679,6 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_mesh_housekeeping_timer, (unsigned long) sdata); - ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; - ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; - ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; - ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; - ifmsh->mshcfg.dot11MeshTTL = MESH_TTL; - ifmsh->mshcfg.element_ttl = MESH_DEFAULT_ELEMENT_TTL; - ifmsh->mshcfg.auto_open_plinks = true; - ifmsh->mshcfg.dot11MeshMaxPeerLinks = - MESH_MAX_ESTAB_PLINKS; - ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout = - MESH_PATH_TIMEOUT; - ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval = - MESH_PREQ_MIN_INT; - ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = - MESH_DIAM_TRAVERSAL_TIME; - ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries = - MESH_MAX_PREQ_RETRIES; - ifmsh->mshcfg.path_refresh_time = - MESH_PATH_REFRESH_TIME; - ifmsh->mshcfg.min_discovery_timeout = - MESH_MIN_DISCOVERY_TIMEOUT; ifmsh->accepting_plinks = true; ifmsh->preq_id = 0; ifmsh->sn = 0; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 182942eeac4d..039d7fa0af74 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -175,33 +175,10 @@ struct mesh_rmc { */ #define MESH_CFG_CMP_LEN (IEEE80211_MESH_CONFIG_LEN - 2) -/* Default values, timeouts in ms */ -#define MESH_TTL 31 -#define MESH_MAX_RETR 3 -#define MESH_RET_T 100 -#define MESH_CONF_T 100 -#define MESH_HOLD_T 100 - -#define MESH_PATH_TIMEOUT 5000 -/* Minimum interval between two consecutive PREQs originated by the same - * interface - */ -#define MESH_PREQ_MIN_INT 10 -#define MESH_DIAM_TRAVERSAL_TIME 50 -/* A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds before - * timing out. This way it will remain ACTIVE and no data frames will be - * unnecesarily held in the pending queue. - */ -#define MESH_PATH_REFRESH_TIME 1000 -#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME) #define MESH_DEFAULT_BEACON_INTERVAL 1000 /* in 1024 us units */ -#define MESH_MAX_PREQ_RETRIES 4 #define MESH_PATH_EXPIRE (600 * HZ) -/* Default maximum number of established plinks per interface */ -#define MESH_MAX_ESTAB_PLINKS 32 - /* Default maximum number of plinks per interface */ #define MESH_MAX_PLINKS 256 @@ -216,8 +193,6 @@ struct mesh_rmc { #define PERR_RCODE_NO_ROUTE 12 #define PERR_RCODE_DEST_UNREACH 13 -#define MESH_DEFAULT_ELEMENT_TTL 31 - /* Public interfaces */ /* Various */ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc, diff --git a/net/wireless/Makefile b/net/wireless/Makefile index e77e508126fa..55a28ab21db9 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -10,7 +10,7 @@ obj-$(CONFIG_WEXT_SPY) += wext-spy.o obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o -cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o +cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 630bcf0a2f04..79772fcc37bc 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -332,6 +332,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) WARN_ON(ops->add_virtual_intf && !ops->del_virtual_intf); WARN_ON(ops->add_station && !ops->del_station); WARN_ON(ops->add_mpath && !ops->del_mpath); + WARN_ON(ops->join_mesh && !ops->leave_mesh); alloc_size = sizeof(*rdev) + sizeof_priv; @@ -752,6 +753,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, cfg80211_mlme_down(rdev, dev); wdev_unlock(wdev); break; + case NL80211_IFTYPE_MESH_POINT: + cfg80211_leave_mesh(rdev, dev); + break; default: break; } @@ -775,20 +779,27 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, } cfg80211_lock_rdev(rdev); mutex_lock(&rdev->devlist_mtx); -#ifdef CONFIG_CFG80211_WEXT wdev_lock(wdev); switch (wdev->iftype) { +#ifdef CONFIG_CFG80211_WEXT case NL80211_IFTYPE_ADHOC: cfg80211_ibss_wext_join(rdev, wdev); break; case NL80211_IFTYPE_STATION: cfg80211_mgd_wext_connect(rdev, wdev); break; +#endif + case NL80211_IFTYPE_MESH_POINT: + /* backward compat code ... */ + if (wdev->mesh_id_up_len) + __cfg80211_join_mesh(rdev, dev, wdev->ssid, + wdev->mesh_id_up_len, + &default_mesh_config); + break; default: break; } wdev_unlock(wdev); -#endif rdev->opencount++; mutex_unlock(&rdev->devlist_mtx); cfg80211_unlock_rdev(rdev); diff --git a/net/wireless/core.h b/net/wireless/core.h index ee80ad8dc655..743203bb61ac 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -285,6 +285,19 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid); int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +/* mesh */ +extern const struct mesh_config default_mesh_config; +int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_config *conf); +int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_config *conf); +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev); + /* MLME */ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, struct net_device *dev, diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c new file mode 100644 index 000000000000..e0b9747fe50a --- /dev/null +++ b/net/wireless/mesh.c @@ -0,0 +1,140 @@ +#include +#include +#include "core.h" + +/* Default values, timeouts in ms */ +#define MESH_TTL 31 +#define MESH_DEFAULT_ELEMENT_TTL 31 +#define MESH_MAX_RETR 3 +#define MESH_RET_T 100 +#define MESH_CONF_T 100 +#define MESH_HOLD_T 100 + +#define MESH_PATH_TIMEOUT 5000 + +/* + * Minimum interval between two consecutive PREQs originated by the same + * interface + */ +#define MESH_PREQ_MIN_INT 10 +#define MESH_DIAM_TRAVERSAL_TIME 50 + +/* + * A path will be refreshed if it is used PATH_REFRESH_TIME milliseconds + * before timing out. This way it will remain ACTIVE and no data frames + * will be unnecessarily held in the pending queue. + */ +#define MESH_PATH_REFRESH_TIME 1000 +#define MESH_MIN_DISCOVERY_TIMEOUT (2 * MESH_DIAM_TRAVERSAL_TIME) + +/* Default maximum number of established plinks per interface */ +#define MESH_MAX_ESTAB_PLINKS 32 + +#define MESH_MAX_PREQ_RETRIES 4 + + +const struct mesh_config default_mesh_config = { + .dot11MeshRetryTimeout = MESH_RET_T, + .dot11MeshConfirmTimeout = MESH_CONF_T, + .dot11MeshHoldingTimeout = MESH_HOLD_T, + .dot11MeshMaxRetries = MESH_MAX_RETR, + .dot11MeshTTL = MESH_TTL, + .element_ttl = MESH_DEFAULT_ELEMENT_TTL, + .auto_open_plinks = true, + .dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS, + .dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT, + .dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT, + .dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME, + .dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES, + .path_refresh_time = MESH_PATH_REFRESH_TIME, + .min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT, +}; + + +int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_config *conf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct mesh_setup setup = { + .mesh_id = mesh_id, + .mesh_id_len = mesh_id_len, + }; + int err; + + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (wdev->mesh_id_len) + return -EALREADY; + + if (!mesh_id_len) + return -EINVAL; + + if (!rdev->ops->join_mesh) + return -EOPNOTSUPP; + + err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup); + if (!err) { + memcpy(wdev->ssid, mesh_id, mesh_id_len); + wdev->mesh_id_len = mesh_id_len; + } + + return err; +} + +int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev, + const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_config *conf) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf); + wdev_unlock(wdev); + + return err; +} + +static int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + ASSERT_WDEV_LOCK(wdev); + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->leave_mesh) + return -EOPNOTSUPP; + + if (!wdev->mesh_id_len) + return -ENOTCONN; + + err = rdev->ops->leave_mesh(&rdev->wiphy, dev); + if (!err) + wdev->mesh_id_len = 0; + return err; +} + +int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + int err; + + wdev_lock(wdev); + err = __cfg80211_leave_mesh(rdev, dev); + wdev_unlock(wdev); + + return err; +} diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c8d4d53fc450..56508d40c740 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -661,13 +661,14 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(add_beacon, NEW_BEACON); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); - CMD(set_mesh_params, SET_MESH_PARAMS); + CMD(update_mesh_params, SET_MESH_PARAMS); CMD(change_bss, SET_BSS); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); + CMD(join_mesh, JOIN_MESH); CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); @@ -1324,11 +1325,21 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_MESH_ID]) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; - params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); - params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - change = true; + if (netif_running(dev)) + return -EBUSY; + + wdev_lock(wdev); + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != + IEEE80211_MAX_MESH_ID_LEN); + wdev->mesh_id_up_len = + nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->mesh_id_up_len); + wdev_unlock(wdev); } if (info->attrs[NL80211_ATTR_4ADDR]) { @@ -1388,12 +1399,6 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { - params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); - params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - } - if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); @@ -1410,6 +1415,20 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dev)) return PTR_ERR(dev); + if (type == NL80211_IFTYPE_MESH_POINT && + info->attrs[NL80211_ATTR_MESH_ID]) { + struct wireless_dev *wdev = dev->ieee80211_ptr; + + wdev_lock(wdev); + BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != + IEEE80211_MAX_MESH_ID_LEN); + wdev->mesh_id_up_len = + nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->mesh_id_up_len); + wdev_unlock(wdev); + } + return 0; } @@ -2543,21 +2562,32 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) } static int nl80211_get_mesh_params(struct sk_buff *skb, - struct genl_info *info) + struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct mesh_config cur_params; - int err; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct mesh_config cur_params; + int err = 0; void *hdr; struct nlattr *pinfoattr; struct sk_buff *msg; + if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + if (!rdev->ops->get_mesh_params) return -EOPNOTSUPP; - /* Get the mesh params */ - err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params); + wdev_lock(wdev); + /* If not connected, get default parameters */ + if (!wdev->mesh_id_len) + memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); + else + err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, + &cur_params); + wdev_unlock(wdev); + if (err) return err; @@ -2705,23 +2735,37 @@ do {\ #undef FILL_IN_MESH_PARAM_IF_SET } -static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) +static int nl80211_update_mesh_params(struct sk_buff *skb, + struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; struct mesh_config cfg; u32 mask; int err; - if (!rdev->ops->set_mesh_params) + if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->update_mesh_params) return -EOPNOTSUPP; err = nl80211_parse_mesh_params(info, &cfg, &mask); if (err) return err; - /* Apply changes */ - return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); + wdev_lock(wdev); + if (!wdev->mesh_id_len) + err = -ENOLINK; + + if (!err) + err = rdev->ops->update_mesh_params(&rdev->wiphy, dev, + mask, &cfg); + + wdev_unlock(wdev); + + return err; } static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) @@ -4505,6 +4549,41 @@ out: return err; } +static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + struct mesh_config cfg; + int err; + + /* start with default */ + memcpy(&cfg, &default_mesh_config, sizeof(cfg)); + + if (info->attrs[NL80211_ATTR_MESH_PARAMS]) { + /* and parse parameters if given */ + err = nl80211_parse_mesh_params(info, &cfg, NULL); + if (err) + return err; + } + + if (!info->attrs[NL80211_ATTR_MESH_ID] || + !nla_len(info->attrs[NL80211_ATTR_MESH_ID])) + return -EINVAL; + + return cfg80211_join_mesh(rdev, dev, + nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + nla_len(info->attrs[NL80211_ATTR_MESH_ID]), + &cfg); +} + +static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + + return cfg80211_leave_mesh(rdev, dev); +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -4769,10 +4848,10 @@ static struct genl_ops nl80211_ops[] = { }, { .cmd = NL80211_CMD_SET_MESH_PARAMS, - .doit = nl80211_set_mesh_params, + .doit = nl80211_update_mesh_params, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, - .internal_flags = NL80211_FLAG_NEED_NETDEV | + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, { @@ -4987,6 +5066,22 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_JOIN_MESH, + .doit = nl80211_join_mesh, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_LEAVE_MESH, + .doit = nl80211_leave_mesh, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { diff --git a/net/wireless/util.c b/net/wireless/util.c index fee020b15a4e..4de624ca4c63 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -792,6 +792,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (ntype != otype) { dev->ieee80211_ptr->use_4addr = false; + dev->ieee80211_ptr->mesh_id_up_len = 0; switch (otype) { case NL80211_IFTYPE_ADHOC: -- cgit v1.2.3-59-g8ed1b From 541a45a142df281c974d74eac2066138fc107b23 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Thu, 2 Dec 2010 19:12:43 +0900 Subject: nl80211/mac80211: Report signal average Extend nl80211 to report an exponential weighted moving average (EWMA) of the signal value. Since the signal value usually fluctuates between different packets, an average can be more useful than the value of the last packet. This uses the recently added generic EWMA library function. -- v2: fix ABI breakage and change factor to be a power of 2. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 4 ++++ net/mac80211/Kconfig | 1 + net/mac80211/cfg.c | 3 ++- net/mac80211/rx.c | 1 + net/mac80211/sta_info.c | 2 ++ net/mac80211/sta_info.h | 3 +++ net/wireless/nl80211.c | 3 +++ 8 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 410a06ea551b..8e28053ea423 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1191,6 +1191,7 @@ enum nl80211_rate_info { * station) * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station) * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station) + * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm) */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -1206,6 +1207,7 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_PACKETS, NL80211_STA_INFO_TX_RETRIES, NL80211_STA_INFO_TX_FAILED, + NL80211_STA_INFO_SIGNAL_AVG, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 788c3989a9e8..8764c9a5bab7 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -420,6 +420,7 @@ struct station_parameters { * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled + * @STATION_INFO_SIGNAL_AVG: @signal_avg filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -435,6 +436,7 @@ enum station_info_flags { STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, STATION_INFO_RX_DROP_MISC = 1<<12, + STATION_INFO_SIGNAL_AVG = 1<<13, }; /** @@ -481,6 +483,7 @@ struct rate_info { * @plid: mesh peer link id * @plink_state: mesh peer link state * @signal: signal strength of last received packet in dBm + * @signal_avg: signal strength average in dBm * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station @@ -501,6 +504,7 @@ struct station_info { u16 plid; u8 plink_state; s8 signal; + s8 signal_avg; struct rate_info txrate; u32 rx_packets; u32 tx_packets; diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 4d6f8653ec88..798d9b9462e2 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -6,6 +6,7 @@ config MAC80211 select CRYPTO_ARC4 select CRYPTO_AES select CRC32 + select AVERAGE ---help--- This option enables the hardware independent IEEE 802.11 networking stack. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 68329d713c02..af9620406321 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -342,8 +342,9 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { - sinfo->filled |= STATION_INFO_SIGNAL; + sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG; sinfo->signal = (s8)sta->last_signal; + sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal); } sinfo->txrate.flags = 0; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6289525c0998..2fe8f5f86499 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1163,6 +1163,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = status->signal; + ewma_add(&sta->avg_signal, -status->signal); /* * Change STA power saving mode only at the end of a frame diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index eff58571fd7e..c426504ed1cf 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -244,6 +244,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->local = local; sta->sdata = sdata; + ewma_init(&sta->avg_signal, 1024, 8); + if (sta_prepare_rate_control(local, sta, gfp)) { kfree(sta); return NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 05f11302443b..fdca52cf88de 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "key.h" /** @@ -223,6 +224,7 @@ enum plink_state { * @rx_fragments: number of received MPDUs * @rx_dropped: number of dropped MPDUs from this STA * @last_signal: signal of last received frame from this STA + * @avg_signal: moving average of signal of received frames from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) * @tx_filtered_count: number of frames the hardware filtered for this STA * @tx_retry_failed: number of frames that failed retry @@ -291,6 +293,7 @@ struct sta_info { unsigned long rx_fragments; unsigned long rx_dropped; int last_signal; + struct ewma avg_signal; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; /* Updated from TX status path only, no locking requirements */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 56508d40c740..2cf03331d4a2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1896,6 +1896,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_SIGNAL) NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL, sinfo->signal); + if (sinfo->filled & STATION_INFO_SIGNAL_AVG) + NLA_PUT_U8(msg, NL80211_STA_INFO_SIGNAL_AVG, + sinfo->signal_avg); if (sinfo->filled & STATION_INFO_TX_BITRATE) { txrate = nla_nest_start(msg, NL80211_STA_INFO_TX_BITRATE); if (!txrate) -- cgit v1.2.3-59-g8ed1b From c02178d22b3ef2d18c38c96151600ee1c7ed94f0 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 8 Dec 2010 00:21:05 +0200 Subject: Bluetooth: Add Bluetooth Management interface definitions Add initial definitions for the new Bluetooth Management interface to the bluetooth headers. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci.h | 4 ++++ include/net/bluetooth/hci_core.h | 1 + include/net/bluetooth/mgmt.h | 46 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 include/net/bluetooth/mgmt.h (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index f3c5ed6d7bda..29a7a8ca0438 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -934,9 +934,13 @@ static inline struct hci_sco_hdr *hci_sco_hdr(const struct sk_buff *skb) struct sockaddr_hci { sa_family_t hci_family; unsigned short hci_dev; + unsigned short hci_channel; }; #define HCI_DEV_NONE 0xffff +#define HCI_CHANNEL_RAW 0 +#define HCI_CHANNEL_CONTROL 1 + struct hci_filter { unsigned long type_mask; unsigned long event_mask[2]; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9c08625617a1..3e3435945980 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -668,6 +668,7 @@ struct hci_pinfo { struct hci_dev *hdev; struct hci_filter filter; __u32 cmsg_mask; + unsigned short channel; }; /* HCI security filter */ diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h new file mode 100644 index 000000000000..95974daa725e --- /dev/null +++ b/include/net/bluetooth/mgmt.h @@ -0,0 +1,46 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + + Copyright (C) 2010 Nokia Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +struct mgmt_hdr { + __le16 opcode; + __le16 len; +} __packed; +#define MGMT_HDR_SIZE 4 + +#define MGMT_EV_CMD_COMPLETE 0x0001 +struct mgmt_ev_cmd_complete { + __le16 opcode; + __u8 data[0]; +} __packed; + +#define MGMT_EV_CMD_STATUS 0x0002 +struct mgmt_ev_cmd_status { + __u8 status; + __le16 opcode; +} __packed; + +#define MGMT_EV_CONTROLLER_ERROR 0x0003 +struct mgmt_ev_controller_error { + __le16 index; + __u8 error_code; +} __packed; -- cgit v1.2.3-59-g8ed1b From 0381101fd6a73c7d6b545044dc1472d019fc64e3 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 8 Dec 2010 00:21:06 +0200 Subject: Bluetooth: Add initial Bluetooth Management interface callbacks Add initial code for handling Bluetooth Management interface messages. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Acked-by: Andrei Emeltchenko Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci_core.h | 3 ++ net/bluetooth/Makefile | 2 +- net/bluetooth/hci_sock.c | 39 +++++++++++++--- net/bluetooth/mgmt.c | 99 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+), 7 deletions(-) create mode 100644 net/bluetooth/mgmt.c (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3e3435945980..1992fac7e921 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -660,6 +660,9 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data); /* ----- HCI Sockets ----- */ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); +/* Management interface */ +int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len); + /* HCI info for socket */ #define hci_pi(sk) ((struct hci_pinfo *) sk) diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 7ca1f46a471a..250f954f0213 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_BT_BNEP) += bnep/ obj-$(CONFIG_BT_CMTP) += cmtp/ obj-$(CONFIG_BT_HIDP) += hidp/ -bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o hci_sock.o hci_sysfs.o lib.o +bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o hci_sock.o hci_sysfs.o lib.o diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b3753bad2a55..207be7abda9f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -49,6 +49,8 @@ #include #include +static int enable_mgmt; + /* ----- HCI socket interface ----- */ static inline int hci_test_bit(int nr, void *addr) @@ -353,25 +355,35 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long a static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { - struct sockaddr_hci *haddr = (struct sockaddr_hci *) addr; + struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; - int err = 0; + int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); - if (!haddr || haddr->hci_family != AF_BLUETOOTH) + if (!addr) + return -EINVAL; + + memset(&haddr, 0, sizeof(haddr)); + len = min_t(unsigned int, sizeof(haddr), addr_len); + memcpy(&haddr, addr, len); + + if (haddr.hci_family != AF_BLUETOOTH) + return -EINVAL; + + if (haddr.hci_channel != HCI_CHANNEL_RAW && !enable_mgmt) return -EINVAL; lock_sock(sk); - if (hci_pi(sk)->hdev) { + if (sk->sk_state == BT_BOUND || hci_pi(sk)->hdev) { err = -EALREADY; goto done; } - if (haddr->hci_dev != HCI_DEV_NONE) { - hdev = hci_dev_get(haddr->hci_dev); + if (haddr.hci_dev != HCI_DEV_NONE) { + hdev = hci_dev_get(haddr.hci_dev); if (!hdev) { err = -ENODEV; goto done; @@ -380,6 +392,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; hci_pi(sk)->hdev = hdev; sk->sk_state = BT_BOUND; @@ -502,6 +515,17 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock, lock_sock(sk); + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + break; + case HCI_CHANNEL_CONTROL: + err = mgmt_control(sk, msg, len); + goto done; + default: + err = -EINVAL; + goto done; + } + hdev = hci_pi(sk)->hdev; if (!hdev) { err = -EBADFD; @@ -831,3 +855,6 @@ void __exit hci_sock_cleanup(void) proto_unregister(&hci_sk_proto); } + +module_param(enable_mgmt, bool, 0644); +MODULE_PARM_DESC(enable_mgmt, "Enable Management interface"); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c new file mode 100644 index 000000000000..d15bf676c350 --- /dev/null +++ b/net/bluetooth/mgmt.c @@ -0,0 +1,99 @@ +/* + BlueZ - Bluetooth protocol stack for Linux + Copyright (C) 2010 Nokia Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License version 2 as + published by the Free Software Foundation; + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY + CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, + COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS + SOFTWARE IS DISCLAIMED. +*/ + +/* Bluetooth HCI Management interface */ + +#include +#include + +#include +#include +#include + +static void cmd_status(struct sock *sk, u16 cmd, u8 status) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_status *ev; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); + if (!skb) + return; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->len = cpu_to_le16(sizeof(*ev)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + ev->status = status; + put_unaligned_le16(cmd, &ev->opcode); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); +} + +int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) +{ + unsigned char *buf; + struct mgmt_hdr *hdr; + u16 opcode, len; + int err; + + BT_DBG("got %zu bytes", msglen); + + if (msglen < sizeof(*hdr)) + return -EINVAL; + + buf = kmalloc(msglen, GFP_ATOMIC); + if (!buf) + return -ENOMEM; + + if (memcpy_fromiovec(buf, msg->msg_iov, msglen)) { + err = -EFAULT; + goto done; + } + + hdr = (struct mgmt_hdr *) buf; + opcode = get_unaligned_le16(&hdr->opcode); + len = get_unaligned_le16(&hdr->len); + + if (len != msglen - sizeof(*hdr)) { + err = -EINVAL; + goto done; + } + + switch (opcode) { + default: + BT_DBG("Unknown op %u", opcode); + cmd_status(sk, opcode, 0x01); + break; + } + + err = msglen; + +done: + kfree(buf); + return err; +} -- cgit v1.2.3-59-g8ed1b From a40c406cbdd28dcca3483065bc2ba794cf5aaab7 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 8 Dec 2010 00:21:07 +0200 Subject: Bluetooth: Make hci_send_to_sock usable for management control sockets In order to send data to management control sockets the function should: - skip checks intended for raw HCI data and stack internal events - make sure RAW HCI data or stack internal events don't go to management control sockets In order to accomplish this the patch adds a new member to the bluetooth skb private data to flag skb's that are destined for management control sockets. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/bluetooth.h | 1 + net/bluetooth/hci_sock.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index d81ea7997701..0c5e72503b77 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -144,6 +144,7 @@ struct bt_skb_cb { __u8 tx_seq; __u8 retries; __u8 sar; + unsigned short channel; }; #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb)) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 207be7abda9f..f6c18abab797 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -104,6 +104,12 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) if (skb->sk == sk) continue; + if (bt_cb(skb)->channel != hci_pi(sk)->channel) + continue; + + if (bt_cb(skb)->channel == HCI_CHANNEL_CONTROL) + goto clone; + /* Apply filter */ flt = &hci_pi(sk)->filter; @@ -127,12 +133,14 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) continue; } +clone: nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) continue; /* Put type byte before the data */ - memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); + if (bt_cb(skb)->channel == HCI_CHANNEL_RAW) + memcpy(skb_push(nskb, 1), &bt_cb(nskb)->pkt_type, 1); if (sock_queue_rcv_skb(sk, nskb)) kfree_skb(nskb); -- cgit v1.2.3-59-g8ed1b From 50b12f597be354a5a224f05c65c54c0667e57aec Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Fri, 19 Nov 2010 12:40:25 +0100 Subject: cfg80211: Add new BSS attribute ht_opmode Add a new BSS attribute to allow hostapd to set the current HT opmode. Otherwise drivers won't be able to set up protection for HT rates in AP mode. Cc: Johannes Berg Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 3 +++ net/wireless/nl80211.c | 5 +++++ 3 files changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 8e28053ea423..380421253d16 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -849,6 +849,8 @@ enum nl80211_commands { * flag isn't set, the frame will be rejected. This is also used as an * nl80211 capability flag. * + * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1025,6 +1027,8 @@ enum nl80211_attrs { NL80211_ATTR_OFFCHANNEL_TX_OK, + NL80211_ATTR_BSS_HT_OPMODE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8764c9a5bab7..0d5979924be3 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -605,6 +605,8 @@ struct mpath_info { * (or NULL for no change) * @basic_rates_len: number of basic rates * @ap_isolate: do not forward packets between connected stations + * @ht_opmode: HT Operation mode + * (u16 = opmode, -1 = do not change) */ struct bss_parameters { int use_cts_prot; @@ -613,6 +615,7 @@ struct bss_parameters { u8 *basic_rates; u8 basic_rates_len; int ap_isolate; + int ht_opmode; }; /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2cf03331d4a2..c3f80e565365 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -121,6 +121,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, + [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, @@ -2462,6 +2463,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.use_short_preamble = -1; params.use_short_slot_time = -1; params.ap_isolate = -1; + params.ht_opmode = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = @@ -2480,6 +2482,9 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_AP_ISOLATE]) params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); + if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE]) + params.ht_opmode = + nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); if (!rdev->ops->change_bss) return -EOPNOTSUPP; -- cgit v1.2.3-59-g8ed1b From defb3519a64141608725e2dac5a5aa9a3c644bae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 8 Dec 2010 21:16:57 -0800 Subject: net: Abstract away all dst_entry metrics accesses. Use helper functions to hide all direct accesses, especially writes, to dst_entry metrics values. This will allow us to: 1) More easily change how the metrics are stored. 2) Implement COW for metrics. In particular this will help us put metrics into the inetpeer cache if that is what we end up doing. We can make the _metrics member a pointer instead of an array, initially have it point at the read-only metrics in the FIB, and then on the first set grab an inetpeer entry and point the _metrics member there. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/dst.h | 26 ++++++++++++++++--- net/bridge/br_device.c | 2 +- net/bridge/br_netfilter.c | 2 +- net/decnet/dn_route.c | 13 +++++----- net/ipv4/ip_gre.c | 2 +- net/ipv4/route.c | 55 +++++++++++++++++++++------------------ net/ipv4/tcp_input.c | 22 +++++++++------- net/ipv6/ndisc.c | 5 ++-- net/ipv6/route.c | 66 ++++++++++++++++++++++++++--------------------- net/xfrm/xfrm_policy.c | 6 ++--- 10 files changed, 118 insertions(+), 81 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index a5bd72646d65..85dee3a57b9b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -70,7 +70,7 @@ struct dst_entry { struct dst_ops *ops; - u32 metrics[RTAX_MAX]; + u32 _metrics[RTAX_MAX]; #ifdef CONFIG_NET_CLS_ROUTE __u32 tclassid; @@ -106,7 +106,27 @@ struct dst_entry { static inline u32 dst_metric(const struct dst_entry *dst, int metric) { - return dst->metrics[metric-1]; + return dst->_metrics[metric-1]; +} + +static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) +{ + dst->_metrics[metric-1] = val; +} + +static inline void dst_import_metrics(struct dst_entry *dst, const u32 *src_metrics) +{ + memcpy(dst->_metrics, src_metrics, RTAX_MAX * sizeof(u32)); +} + +static inline void dst_copy_metrics(struct dst_entry *dest, const struct dst_entry *src) +{ + dst_import_metrics(dest, src->_metrics); +} + +static inline u32 *dst_metrics_ptr(struct dst_entry *dst) +{ + return dst->_metrics; } static inline u32 @@ -134,7 +154,7 @@ static inline unsigned long dst_metric_rtt(const struct dst_entry *dst, int metr static inline void set_dst_metric_rtt(struct dst_entry *dst, int metric, unsigned long rtt) { - dst->metrics[metric-1] = jiffies_to_msecs(rtt); + dst_metric_set(dst, metric, jiffies_to_msecs(rtt)); } static inline u32 diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 17cb0b633576..556443566e9c 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -141,7 +141,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu) #ifdef CONFIG_BRIDGE_NETFILTER /* remember the MTU in the rtable for PMTU */ - br->fake_rtable.dst.metrics[RTAX_MTU - 1] = new_mtu; + dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu); #endif return 0; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6e1392093911..16f5c333596a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -124,7 +124,7 @@ void br_netfilter_rtable_init(struct net_bridge *br) atomic_set(&rt->dst.__refcnt, 1); rt->dst.dev = br->dev; rt->dst.path = &rt->dst; - rt->dst.metrics[RTAX_MTU - 1] = 1500; + dst_metric_set(&rt->dst, RTAX_MTU, 1500); rt->dst.flags = DST_NOXFRM; rt->dst.ops = &fake_dst_ops; } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 8280e43c8861..e2e926841fe6 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -240,13 +240,13 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) { if (!(dst_metric_locked(dst, RTAX_MTU))) { - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, dn_rt_mtu_expires); } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; if (dst_metric(dst, RTAX_ADVMSS) > mss) - dst->metrics[RTAX_ADVMSS-1] = mss; + dst_metric_set(dst, RTAX_ADVMSS, mss); } } } @@ -806,8 +806,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (DN_FIB_RES_GW(*res) && DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = DN_FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); + dst_import_metrics(&rt->dst, fi->fib_metrics); } rt->rt_type = res->type; @@ -820,11 +819,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) == 0 || dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; + dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || dst_metric(&rt->dst, RTAX_ADVMSS) > mss) - rt->dst.metrics[RTAX_ADVMSS-1] = mss; + dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); return 0; } @@ -1502,7 +1501,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); if (rt->rt_daddr != rt->rt_gateway) RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto rtattr_failure; expires = rt->dst.expires ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, expires, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 258c98d5fa79..ff4e7a4e33ed 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -818,7 +818,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev !ipv4_is_multicast(tunnel->parms.iph.daddr)) || rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; - skb_dst(skb)->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); } } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3843c2dfde82..26ac396eaa5e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1686,11 +1686,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, if (mtu < dst_mtu(&rth->dst)) { dst_confirm(&rth->dst); if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(&rth->dst, + RTAX_LOCK); mtu = ip_rt_min_pmtu; - rth->dst.metrics[RTAX_LOCK-1] |= - (1 << RTAX_MTU); + lock |= (1 << RTAX_MTU); + dst_metric_set(&rth->dst, RTAX_LOCK, + lock); } - rth->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rth->dst, RTAX_MTU, mtu); dst_set_expires(&rth->dst, ip_rt_mtu_expires); } @@ -1708,10 +1711,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (dst_mtu(dst) > mtu && mtu >= 68 && !(dst_metric_locked(dst, RTAX_MTU))) { if (mtu < ip_rt_min_pmtu) { + u32 lock = dst_metric(dst, RTAX_LOCK); mtu = ip_rt_min_pmtu; - dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU); + dst_metric_set(dst, RTAX_LOCK, lock | (1 << RTAX_MTU)); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); dst_set_expires(dst, ip_rt_mtu_expires); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } @@ -1796,36 +1800,37 @@ static void set_class_tag(struct rtable *rt, u32 tag) static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { + struct dst_entry *dst = &rt->dst; struct fib_info *fi = res->fi; if (fi) { if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); - memcpy(rt->dst.metrics, fi->fib_metrics, - sizeof(rt->dst.metrics)); + dst_import_metrics(dst, fi->fib_metrics); if (fi->fib_mtu == 0) { - rt->dst.metrics[RTAX_MTU-1] = rt->dst.dev->mtu; - if (dst_metric_locked(&rt->dst, RTAX_MTU) && + dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); + if (dst_metric_locked(dst, RTAX_MTU) && rt->rt_gateway != rt->rt_dst && - rt->dst.dev->mtu > 576) - rt->dst.metrics[RTAX_MTU-1] = 576; + dst->dev->mtu > 576) + dst_metric_set(dst, RTAX_MTU, 576); } #ifdef CONFIG_NET_CLS_ROUTE - rt->dst.tclassid = FIB_RES_NH(*res).nh_tclassid; + dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif } else - rt->dst.metrics[RTAX_MTU-1]= rt->dst.dev->mtu; - - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl; - if (dst_mtu(&rt->dst) > IP_MAX_MTU) - rt->dst.metrics[RTAX_MTU-1] = IP_MAX_MTU; - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0) - rt->dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->dst.dev->mtu - 40, - ip_rt_min_advmss); - if (dst_metric(&rt->dst, RTAX_ADVMSS) > 65535 - 40) - rt->dst.metrics[RTAX_ADVMSS-1] = 65535 - 40; + dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); + + if (dst_metric(dst, RTAX_HOPLIMIT) == 0) + dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl); + if (dst_mtu(dst) > IP_MAX_MTU) + dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); + if (dst_metric(dst, RTAX_ADVMSS) == 0) + dst_metric_set(dst, RTAX_ADVMSS, + max_t(unsigned int, dst->dev->mtu - 40, + ip_rt_min_advmss)); + if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40) + dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); #ifdef CONFIG_NET_CLS_ROUTE #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -2720,7 +2725,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi new->__use = 1; new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) @@ -2827,7 +2832,7 @@ static int rt_fill_info(struct net *net, if (rt->rt_dst != rt->rt_gateway) NLA_PUT_BE32(skb, RTA_GATEWAY, rt->rt_gateway); - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->fl.mark) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6d8ab1c4efc3..824e8c8a17ad 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -734,7 +734,7 @@ void tcp_update_metrics(struct sock *sk) * Reset our results. */ if (!(dst_metric_locked(dst, RTAX_RTT))) - dst->metrics[RTAX_RTT - 1] = 0; + dst_metric_set(dst, RTAX_RTT, 0); return; } @@ -776,34 +776,38 @@ void tcp_update_metrics(struct sock *sk) if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_cwnd >> 1); if (!dst_metric_locked(dst, RTAX_CWND) && tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd; + dst_metric_set(dst, RTAX_CWND, tp->snd_cwnd); } else if (tp->snd_cwnd > tp->snd_ssthresh && icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = - max(tp->snd_cwnd >> 1, tp->snd_ssthresh); + dst_metric_set(dst, RTAX_SSTHRESH, + max(tp->snd_cwnd >> 1, tp->snd_ssthresh)); if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_cwnd) >> 1); } else { /* Else slow start did not finish, cwnd is non-sense, ssthresh may be also invalid. */ if (!dst_metric_locked(dst, RTAX_CWND)) - dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1; + dst_metric_set(dst, RTAX_CWND, + (dst_metric(dst, RTAX_CWND) + + tp->snd_ssthresh) >> 1); if (dst_metric(dst, RTAX_SSTHRESH) && !dst_metric_locked(dst, RTAX_SSTHRESH) && tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH)) - dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh; + dst_metric_set(dst, RTAX_SSTHRESH, tp->snd_ssthresh); } if (!dst_metric_locked(dst, RTAX_REORDERING)) { if (dst_metric(dst, RTAX_REORDERING) < tp->reordering && tp->reordering != sysctl_tcp_reordering) - dst->metrics[RTAX_REORDERING-1] = tp->reordering; + dst_metric_set(dst, RTAX_REORDERING, tp->reordering); } } } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index e18f84130203..2342545a5ee9 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1259,7 +1259,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (ra_msg->icmph.icmp6_hop_limit) { in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; if (rt) - rt->dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, + ra_msg->icmph.icmp6_hop_limit); } skip_defrtr: @@ -1377,7 +1378,7 @@ skip_linkparms: in6_dev->cnf.mtu6 = mtu; if (rt) - rt->dst.metrics[RTAX_MTU-1] = mtu; + dst_metric_set(&rt->dst, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 026caef0326c..4aed0812b512 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -129,7 +129,6 @@ static struct rt6_info ip6_null_entry_template = { .__use = 1, .obsolete = -1, .error = -ENETUNREACH, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, }, @@ -150,7 +149,6 @@ static struct rt6_info ip6_prohibit_entry_template = { .__use = 1, .obsolete = -1, .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, }, @@ -166,7 +164,6 @@ static struct rt6_info ip6_blk_hole_entry_template = { .__use = 1, .obsolete = -1, .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, .input = dst_discard, .output = dst_discard, }, @@ -844,7 +841,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl new->input = dst_discard; new->output = dst_discard; - memcpy(new->metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(new, &ort->dst); new->dev = ort->dst.dev; if (new->dev) dev_hold(new->dev); @@ -928,10 +925,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { + u32 features = dst_metric(dst, RTAX_FEATURES); mtu = IPV6_MIN_MTU; - dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(dst, RTAX_FEATURES, features); } - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); } } @@ -989,9 +988,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_idev = idev; rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); - rt->dst.metrics[RTAX_HOPLIMIT-1] = 255; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1305,17 +1304,17 @@ install_route: goto out; } - rt->dst.metrics[type - 1] = nla_get_u32(nla); + dst_metric_set(&rt->dst, type, nla_get_u32(nla)); } } } if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1541,9 +1540,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ - nrt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); - nrt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst)); + dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); + dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), + dst_mtu(&nrt->dst))); if (ip6_ins_rt(nrt)) goto out; @@ -1602,9 +1601,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, would return automatically. */ if (rt->rt6i_flags & RTF_CACHE) { - rt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - rt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&rt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&rt->dst, RTAX_FEATURES, features); + } dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; goto out; @@ -1621,9 +1623,12 @@ static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, nrt = rt6_alloc_clone(rt, daddr); if (nrt) { - nrt->dst.metrics[RTAX_MTU-1] = pmtu; - if (allfrag) - nrt->dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); + if (allfrag) { + u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); + features |= RTAX_FEATURE_ALLFRAG; + dst_metric_set(&nrt->dst, RTAX_FEATURES, features); + } /* According to RFC 1981, detecting PMTU increase shouldn't be * happened within 5 mins, the recommended timer is 10 mins. @@ -1674,7 +1679,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) rt->dst.input = ort->dst.input; rt->dst.output = ort->dst.output; - memcpy(rt->dst.metrics, ort->dst.metrics, RTAX_MAX*sizeof(u32)); + dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->dst.dev = ort->dst.dev; if (rt->dst.dev) @@ -1966,9 +1971,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - rt->dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->dst)); - rt->dst.metrics[RTAX_HOPLIMIT-1] = -1; + dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); + dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; @@ -2068,8 +2073,8 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) >= arg->mtu || (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - rt->dst.metrics[RTAX_MTU-1] = arg->mtu; - rt->dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); } return 0; } @@ -2295,7 +2300,7 @@ static int rt6_fill_node(struct net *net, NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->dst.metrics) < 0) + if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; if (rt->dst.neighbour) @@ -2686,6 +2691,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); #ifdef CONFIG_IPV6_MULTIPLE_TABLES net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, @@ -2696,6 +2702,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), @@ -2705,6 +2712,7 @@ static int __net_init ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; + dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); #endif net->ipv6.sysctl.flush_delay = 0; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 044e77898512..6e50ccd8c532 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1433,7 +1433,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } xdst->route = dst; - memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics)); + dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; @@ -2271,7 +2271,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; - dst->metrics[RTAX_MTU-1] = pmtu; + dst_metric_set(dst, RTAX_MTU, pmtu); } while ((dst = dst->next)); } @@ -2349,7 +2349,7 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > last->route_mtu_cached) mtu = last->route_mtu_cached; - dst->metrics[RTAX_MTU-1] = mtu; + dst_metric_set(dst, RTAX_MTU, mtu); if (last == first) break; -- cgit v1.2.3-59-g8ed1b From 68835aba4d9b74e2f94106d13b6a4bddc447c4c8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Nov 2010 19:04:07 +0000 Subject: net: optimize INET input path further Followup of commit b178bb3dfc30 (net: reorder struct sock fields) Optimize INET input path a bit further, by : 1) moving sk_refcnt close to sk_lock. This reduces number of dirtied cache lines by one on 64bit arches (and 64 bytes cache line size). 2) moving inet_daddr & inet_rcv_saddr at the beginning of sk (same cache line than hash / family / bound_dev_if / nulls_node) This reduces number of accessed cache lines in lookups by one, and dont increase size of inet and timewait socks. inet and tw sockets now share same place-holder for these fields. Before patch : offsetof(struct sock, sk_refcnt) = 0x10 offsetof(struct sock, sk_lock) = 0x40 offsetof(struct sock, sk_receive_queue) = 0x60 offsetof(struct inet_sock, inet_daddr) = 0x270 offsetof(struct inet_sock, inet_rcv_saddr) = 0x274 After patch : offsetof(struct sock, sk_refcnt) = 0x44 offsetof(struct sock, sk_lock) = 0x48 offsetof(struct sock, sk_receive_queue) = 0x68 offsetof(struct inet_sock, inet_daddr) = 0x0 offsetof(struct inet_sock, inet_rcv_saddr) = 0x4 compute_score() (udp or tcp) now use a single cache line per ignored item, instead of two. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_sock.h | 5 +++-- include/net/inet_timewait_sock.h | 20 +++++++------------- include/net/sock.h | 37 ++++++++++++++++++++++++------------- net/core/sock.c | 11 ++++++----- net/ipv4/inet_connection_sock.c | 7 +++---- net/ipv6/udp.c | 4 ++-- 6 files changed, 45 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8945f9fb192a..8181498fa96c 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -116,8 +116,9 @@ struct inet_sock { struct ipv6_pinfo *pinet6; #endif /* Socket demultiplex comparisons on incoming packets. */ - __be32 inet_daddr; - __be32 inet_rcv_saddr; +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr + __be16 inet_dport; __u16 inet_num; __be32 inet_saddr; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index a066fdd50da6..17404b5388a7 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -88,12 +88,6 @@ extern void inet_twdr_hangman(unsigned long data); extern void inet_twdr_twkill_work(struct work_struct *work); extern void inet_twdr_twcal_tick(unsigned long data); -#if (BITS_PER_LONG == 64) -#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 -#else -#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 -#endif - struct inet_bind_bucket; /* @@ -117,15 +111,15 @@ struct inet_timewait_sock { #define tw_hash __tw_common.skc_hash #define tw_prot __tw_common.skc_prot #define tw_net __tw_common.skc_net +#define tw_daddr __tw_common.skc_daddr +#define tw_rcv_saddr __tw_common.skc_rcv_saddr int tw_timeout; volatile unsigned char tw_substate; - /* 3 bits hole, try to pack */ unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ + /* these three are in inet_sock */ __be16 tw_sport; - __be32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); - __be32 tw_rcv_saddr; __be16 tw_dport; __u16 tw_num; kmemcheck_bitfield_begin(flags); @@ -191,10 +185,10 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) return (struct inet_timewait_sock *)sk; } -static inline __be32 inet_rcv_saddr(const struct sock *sk) +static inline __be32 sk_rcv_saddr(const struct sock *sk) { - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->inet_rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +/* both inet_sk() and inet_twsk() store rcv_saddr in skc_rcv_saddr */ + return sk->__sk_common.skc_rcv_saddr; } extern void inet_twsk_put(struct inet_timewait_sock *tw); diff --git a/include/net/sock.h b/include/net/sock.h index 3482004e5c29..82e86034702f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -105,10 +105,8 @@ struct net; /** * struct sock_common - minimal network layer representation of sockets - * @skc_node: main hash linkage for various protocol lookup tables - * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol - * @skc_refcnt: reference count - * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_daddr: Foreign IPv4 addr + * @skc_rcv_saddr: Bound local IPv4 addr * @skc_hash: hash value used with various protocol lookup tables * @skc_u16hashes: two u16 hash values used by UDP lookup tables * @skc_family: network address family @@ -119,20 +117,20 @@ struct net; * @skc_portaddr_node: second hash linkage for UDP/UDP-Lite protocol * @skc_prot: protocol handlers inside a network family * @skc_net: reference to the network namespace of this socket + * @skc_node: main hash linkage for various protocol lookup tables + * @skc_nulls_node: main hash linkage for TCP/UDP/UDP-Lite protocol + * @skc_tx_queue_mapping: tx queue number for this connection + * @skc_refcnt: reference count * * This is the minimal network layer representation of sockets, the header * for struct sock and struct inet_timewait_sock. */ struct sock_common { - /* - * first fields are not copied in sock_copy() + /* skc_daddr and skc_rcv_saddr must be grouped : + * cf INET_MATCH() and INET_TW_MATCH() */ - union { - struct hlist_node skc_node; - struct hlist_nulls_node skc_nulls_node; - }; - atomic_t skc_refcnt; - int skc_tx_queue_mapping; + __be32 skc_daddr; + __be32 skc_rcv_saddr; union { unsigned int skc_hash; @@ -150,6 +148,18 @@ struct sock_common { #ifdef CONFIG_NET_NS struct net *skc_net; #endif + /* + * fields between dontcopy_begin/dontcopy_end + * are not copied in sock_copy() + */ + int skc_dontcopy_begin[0]; + union { + struct hlist_node skc_node; + struct hlist_nulls_node skc_nulls_node; + }; + int skc_tx_queue_mapping; + atomic_t skc_refcnt; + int skc_dontcopy_end[0]; }; /** @@ -232,7 +242,8 @@ struct sock { #define sk_refcnt __sk_common.skc_refcnt #define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping -#define sk_copy_start __sk_common.skc_hash +#define sk_dontcopy_begin __sk_common.skc_dontcopy_begin +#define sk_dontcopy_end __sk_common.skc_dontcopy_end #define sk_hash __sk_common.skc_hash #define sk_family __sk_common.skc_family #define sk_state __sk_common.skc_state diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..bcdb6ff6e621 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -992,17 +992,18 @@ static inline void sock_lock_init(struct sock *sk) /* * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, * even temporarly, because of RCU lookups. sk_node should also be left as is. + * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end */ static void sock_copy(struct sock *nsk, const struct sock *osk) { #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif - BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) != - sizeof(osk->sk_node) + sizeof(osk->sk_refcnt) + - sizeof(osk->sk_tx_queue_mapping)); - memcpy(&nsk->sk_copy_start, &osk->sk_copy_start, - osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start)); + memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); + + memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, + osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end)); + #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 06f5f8f482f0..25e318153f14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -55,7 +55,6 @@ EXPORT_SYMBOL(inet_get_local_port_range); int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -75,9 +74,9 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) + const __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr(sk) || + sk2_rcv_saddr == sk_rcv_saddr(sk)) break; } } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b541a4e009fb..7aad12770867 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -54,8 +54,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk1_rcv_saddr = inet_sk(sk)->inet_rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk1_rcv_saddr = sk_rcv_saddr(sk); + __be32 sk2_rcv_saddr = sk_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); -- cgit v1.2.3-59-g8ed1b From 35d2856b4693e8de5d616307b56cef296b839157 Mon Sep 17 00:00:00 2001 From: Martin Willi Date: Wed, 8 Dec 2010 04:37:49 +0000 Subject: xfrm: Add Traffic Flow Confidentiality padding XFRM attribute The XFRMA_TFCPAD attribute for XFRM state installation configures Traffic Flow Confidentiality by padding ESP packets to a specified length. Signed-off-by: Martin Willi Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 1 + net/xfrm/xfrm_user.c | 19 +++++++++++++++++-- 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b971e3848493..930fdd2de79c 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -283,6 +283,7 @@ enum xfrm_attr_type_t { XFRMA_KMADDRESS, /* struct xfrm_user_kmaddress */ XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ + XFRMA_TFCPAD, /* __u32 */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7fa5b005893e..b9f385da758e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -143,6 +143,7 @@ struct xfrm_state { struct xfrm_id id; struct xfrm_selector sel; struct xfrm_mark mark; + u32 tfcpad; u32 genid; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8bae6b22c846..8eb889510916 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -148,7 +148,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, !attrs[XFRMA_ALG_AUTH_TRUNC]) || attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_ALG_COMP]) + attrs[XFRMA_ALG_COMP] || + attrs[XFRMA_TFCPAD]) goto out; break; @@ -165,6 +166,9 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_CRYPT]) && attrs[XFRMA_ALG_AEAD]) goto out; + if (attrs[XFRMA_TFCPAD] && + p->mode != XFRM_MODE_TUNNEL) + goto out; break; case IPPROTO_COMP: @@ -172,7 +176,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || - attrs[XFRMA_ALG_CRYPT]) + attrs[XFRMA_ALG_CRYPT] || + attrs[XFRMA_TFCPAD]) goto out; break; @@ -186,6 +191,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || + attrs[XFRMA_TFCPAD] || !attrs[XFRMA_COADDR]) goto out; break; @@ -439,6 +445,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (attrs[XFRMA_TFCPAD]) + x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]); + if (attrs[XFRMA_COADDR]) { x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]), sizeof(*x->coaddr), GFP_KERNEL); @@ -688,6 +697,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->encap) NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (x->tfcpad) + NLA_PUT_U32(skb, XFRMA_TFCPAD, x->tfcpad); + if (xfrm_mark_put(skb, &x->mark)) goto nla_put_failure; @@ -2122,6 +2134,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, + [XFRMA_TFCPAD] = { .type = NLA_U32 }, }; static struct xfrm_link { @@ -2301,6 +2314,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->calg)); if (x->encap) l += nla_total_size(sizeof(*x->encap)); + if (x->tfcpad) + l += nla_total_size(sizeof(x->tfcpad)); if (x->security) l += nla_total_size(sizeof(struct xfrm_user_sec_ctx) + x->security->ctx_len); -- cgit v1.2.3-59-g8ed1b From 5170ae824ddf1988a63fb12cbedcff817634c444 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Dec 2010 21:35:57 -0800 Subject: net: Abstract RTAX_HOPLIMIT metric accesses behind helper. Signed-off-by: David S. Miller --- drivers/net/pptp.c | 2 +- include/net/dst.h | 15 ++++++++++++++- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 2 +- net/ipv6/route.c | 4 ++-- 8 files changed, 22 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c index 7556a9224f72..c83e168eef21 100644 --- a/drivers/net/pptp.c +++ b/drivers/net/pptp.c @@ -277,7 +277,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) iph->tos = 0; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; - iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->ttl = dst_metric_hoplimit(&rt->dst); iph->tot_len = htons(skb->len); skb_dst_drop(skb); diff --git a/include/net/dst.h b/include/net/dst.h index 85dee3a57b9b..9208b500aaaf 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -104,11 +104,24 @@ struct dst_entry { #ifdef __KERNEL__ static inline u32 -dst_metric(const struct dst_entry *dst, int metric) +dst_metric_raw(const struct dst_entry *dst, const int metric) { return dst->_metrics[metric-1]; } +static inline u32 +dst_metric(const struct dst_entry *dst, const int metric) +{ + WARN_ON_ONCE(metric == RTAX_HOPLIMIT); + return dst_metric_raw(dst, metric); +} + +static inline u32 +dst_metric_hoplimit(const struct dst_entry *dst) +{ + return dst_metric_raw(dst, RTAX_HOPLIMIT); +} + static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) { dst->_metrics[metric-1] = val; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ff4e7a4e33ed..46eb3dc37ec6 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -890,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->ttl = dst_metric_hoplimit(&rt->dst); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5090c7ff525e..ea28fa5f1992 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -130,7 +130,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) int ttl = inet->uc_ttl; if (ttl < 0) - ttl = dst_metric(dst, RTAX_HOPLIMIT); + ttl = dst_metric_hoplimit(dst); return ttl; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 43eec80c0e7c..f1309072c541 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; - niph->ttl = dst_metric(skb_dst(nskb), RTAX_HOPLIMIT); + niph->ttl = dst_metric_hoplimit(skb_dst(nskb)); /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 26ac396eaa5e..90b5a37555ab 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1821,7 +1821,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) } else dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); - if (dst_metric(dst, RTAX_HOPLIMIT) == 0) + if (dst_metric_raw(dst, RTAX_HOPLIMIT) == 0) dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl); if (dst_mtu(dst) > IP_MAX_MTU) dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 6f368413eb0e..63b854e74d99 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); - top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT); + top_iph->ttl = dst_metric_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9b2d7bc7beda..d9405d1863b8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1104,7 +1104,7 @@ static int ipv6_get_mtu(struct net_device *dev) int ip6_dst_hoplimit(struct dst_entry *dst) { - int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); if (hoplimit < 0) { struct net_device *dev = dst->dev; struct inet6_dev *idev; @@ -1310,7 +1310,7 @@ install_route: } } - if (dst_metric(&rt->dst, RTAX_HOPLIMIT) == 0) + if (dst_metric_raw(&rt->dst, RTAX_HOPLIMIT) == 0) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); if (!dst_mtu(&rt->dst)) dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); -- cgit v1.2.3-59-g8ed1b From 323e126f0c5995f779d7df7fd035f6e8fed8764d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 12 Dec 2010 21:55:08 -0800 Subject: ipv4: Don't pre-seed hoplimit metric. Always go through a new ip4_dst_hoplimit() helper, just like ipv6. This allowed several simplifications: 1) The interim dst_metric_hoplimit() can go as it's no longer userd. 2) The sysctl_ip_default_ttl entry no longer needs to use ipv4_doint_and_flush, since the sysctl is not cached in routing cache metrics any longer. 3) ipv4_doint_and_flush no longer needs to be exported and therefore can be marked static. When ipv4_doint_and_flush_strategy was removed some time ago, the external declaration in ip.h was mistakenly left around so kill that off too. We have to move the sysctl_ip_default_ttl declaration into ipv4's route cache definition header net/route.h, because currently net/ip.h (where the declaration lives now) has a back dependency on net/route.h Signed-off-by: David S. Miller --- drivers/net/pptp.c | 2 +- include/net/dst.h | 6 ------ include/net/ip.h | 10 ---------- include/net/route.h | 11 +++++++++++ net/ipv4/devinet.c | 6 +++--- net/ipv4/ip_gre.c | 2 +- net/ipv4/ip_output.c | 3 ++- net/ipv4/netfilter/ipt_REJECT.c | 2 +- net/ipv4/route.c | 2 -- net/ipv4/sysctl_net_ipv4.c | 2 +- net/ipv4/xfrm4_mode_tunnel.c | 2 +- 11 files changed, 21 insertions(+), 27 deletions(-) (limited to 'include/net') diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c index c83e168eef21..164cfad6ce79 100644 --- a/drivers/net/pptp.c +++ b/drivers/net/pptp.c @@ -277,7 +277,7 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) iph->tos = 0; iph->daddr = rt->rt_dst; iph->saddr = rt->rt_src; - iph->ttl = dst_metric_hoplimit(&rt->dst); + iph->ttl = ip4_dst_hoplimit(&rt->dst); iph->tot_len = htons(skb->len); skb_dst_drop(skb); diff --git a/include/net/dst.h b/include/net/dst.h index 9208b500aaaf..755ac6c1aa03 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -116,12 +116,6 @@ dst_metric(const struct dst_entry *dst, const int metric) return dst_metric_raw(dst, metric); } -static inline u32 -dst_metric_hoplimit(const struct dst_entry *dst) -{ - return dst_metric_raw(dst, RTAX_HOPLIMIT); -} - static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) { dst->_metrics[metric-1] = val; diff --git a/include/net/ip.h b/include/net/ip.h index 86e2b182a0c0..67fac78a186b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -201,7 +201,6 @@ static inline int inet_is_reserved_local_port(int port) return test_bit(port, sysctl_local_reserved_ports); } -extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; extern struct ctl_path net_core_path[]; @@ -428,15 +427,6 @@ extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); -/* sysctl helpers - any sysctl which holds a value that ends up being - * fed into the routing cache should use these handlers. - */ -int ipv4_doint_and_flush(ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -int ipv4_doint_and_flush_strategy(ctl_table *table, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen); #ifdef CONFIG_PROC_FS extern int ip_misc_proc_init(void); #endif diff --git a/include/net/route.h b/include/net/route.h index b8c1f7703fc6..27002362944a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -231,4 +231,15 @@ static inline int inet_iif(const struct sk_buff *skb) return skb_rtable(skb)->rt_iif; } +extern int sysctl_ip_default_ttl; + +static inline int ip4_dst_hoplimit(const struct dst_entry *dst) +{ + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + + if (hoplimit == 0) + hoplimit = sysctl_ip_default_ttl; + return hoplimit; +} + #endif /* _ROUTE_H */ diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 3b067704ab38..748cb5b337bd 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1430,9 +1430,9 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush(ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) +static int ipv4_doint_and_flush(ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 46eb3dc37ec6..eb68a0e34e49 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -890,7 +890,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif else - iph->ttl = dst_metric_hoplimit(&rt->dst); + iph->ttl = ip4_dst_hoplimit(&rt->dst); } ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ea28fa5f1992..04c7b3ba6b39 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -82,6 +82,7 @@ #include int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; +EXPORT_SYMBOL(sysctl_ip_default_ttl); /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) @@ -130,7 +131,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) int ttl = inet->uc_ttl; if (ttl < 0) - ttl = dst_metric_hoplimit(dst); + ttl = ip4_dst_hoplimit(dst); return ttl; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index f1309072c541..1ff79e557f96 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -116,7 +116,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; - niph->ttl = dst_metric_hoplimit(skb_dst(nskb)); + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 90b5a37555ab..770f70427f0b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1821,8 +1821,6 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) } else dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); - if (dst_metric_raw(dst, RTAX_HOPLIMIT) == 0) - dst_metric_set(dst, RTAX_HOPLIMIT, sysctl_ip_default_ttl); if (dst_mtu(dst) > IP_MAX_MTU) dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); if (dst_metric(dst, RTAX_ADVMSS) == 0) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 1b4ec21497a4..e85ff5930607 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -155,7 +155,7 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_ip_default_ttl, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = ipv4_doint_and_flush, + .proc_handler = proc_dointvec, .extra2 = &init_net, }, { diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 63b854e74d99..534972e114ac 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -56,7 +56,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); ip_select_ident(top_iph, dst->child, NULL); - top_iph->ttl = dst_metric_hoplimit(dst->child); + top_iph->ttl = ip4_dst_hoplimit(dst->child); top_iph->saddr = x->props.saddr.a4; top_iph->daddr = x->id.daddr.a4; -- cgit v1.2.3-59-g8ed1b From a7ffac9591a2a0ee74c431396ae475a8d0caa51e Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 8 Dec 2010 13:59:24 +0900 Subject: cfg80211: Add antenna availability information Add a field to wiphy for the hardware to report the availble antennas for configuration. Only if this is set to something bigger than zero, will the anntenna configuration ops be executed. Allthough this could be a simple number of antennas, I defined it as a bitmap of antennas which are available for configuration, since it's more consistent with the rest of the antenna API and there could be cases where the hardware allows only configuration of certain antennas. As it does not make much of a difference in size or normal usage, I think it's better to be able to support this, in case the need arises. The antenna configuration is now also checked against the availabe antennas and rejected if it does not match. Signed-off-by: Bruno Randolf -- v3: always apply available antenna mask (for "all" antennas case). v2: reject antenna configurations which don't match the available antennas Signed-off-by: John W. Linville --- include/net/cfg80211.h | 5 +++++ net/wireless/nl80211.c | 15 +++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0d5979924be3..4d5acb013636 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1468,6 +1468,9 @@ struct ieee80211_txrx_stypes { * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or * transmitted through nl80211, points to an array indexed by interface * type + * + * @available_antennas: bitmap of antennas which are available to configure. + * antenna configuration commands will be rejected unless this is set. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1507,6 +1510,8 @@ struct wiphy { u8 max_num_pmkids; + u32 available_antennas; + /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't * know whether it points to a wiphy your driver has registered diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c3f80e565365..73a7f6d354c9 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -548,7 +548,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); - if (dev->ops->get_antenna) { + if (dev->wiphy.available_antennas && dev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); @@ -1046,7 +1046,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { u32 tx_ant, rx_ant; - if (!rdev->ops->set_antenna) { + if (!rdev->wiphy.available_antennas || !rdev->ops->set_antenna) { result = -EOPNOTSUPP; goto bad_res; } @@ -1054,6 +1054,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); + /* reject antenna configurations which don't match the + * available antenna mask, except for the "all" mask */ + if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas)) || + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas))) { + result = -EINVAL; + goto bad_res; + } + + tx_ant = tx_ant & rdev->wiphy.available_antennas; + rx_ant = rx_ant & rdev->wiphy.available_antennas; + result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); if (result) goto bad_res; -- cgit v1.2.3-59-g8ed1b From dbd2fd656f2060abfd3a16257f8b51ec60f6d2ed Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Dec 2010 19:58:59 +0100 Subject: cfg80211/nl80211: separate unicast/multicast default TX keys Allow userspace to specify that a given key is default only for unicast and/or multicast transmissions. Only WEP keys are for both, WPA/RSN keys set here are GTKs for multicast only. For more future flexibility, allow to specify all combiations. Wireless extensions can only set both so use nl80211; WEP keys (connect keys) must be set as default for both (but 802.1X WEP is still possible). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwmc3200wifi/cfg80211.c | 3 +- drivers/net/wireless/libertas/cfg.c | 3 +- drivers/net/wireless/rndis_wlan.c | 4 +- include/linux/nl80211.h | 27 ++++++ include/net/cfg80211.h | 5 +- net/mac80211/cfg.c | 3 +- net/wireless/nl80211.c | 125 +++++++++++++++++++++++---- net/wireless/util.c | 3 +- net/wireless/wext-compat.c | 8 +- 9 files changed, 152 insertions(+), 29 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index c6c0eff9b5ed..5a4982271e96 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -225,7 +225,8 @@ static int iwm_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, static int iwm_cfg80211_set_default_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index) + u8 key_index, bool unicast, + bool multicast) { struct iwm_priv *iwm = ndev_to_iwm(ndev); diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index dee32d3681a5..300be1931826 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -1422,7 +1422,8 @@ static int lbs_cfg_disconnect(struct wiphy *wiphy, struct net_device *dev, static int lbs_cfg_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index) + u8 key_index, bool unicast, + bool multicast) { struct lbs_private *priv = wiphy_priv(wiphy); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 19f3d568f700..4a4f00591447 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -554,7 +554,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index, bool pairwise, const u8 *mac_addr); static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index); + u8 key_index, bool unicast, bool multicast); static int rndis_get_station(struct wiphy *wiphy, struct net_device *dev, u8 *mac, struct station_info *sinfo); @@ -2381,7 +2381,7 @@ static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, } static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index) + u8 key_index, bool unicast, bool multicast) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 380421253d16..b8fa25d741ba 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -851,6 +851,10 @@ enum nl80211_commands { * * @NL80211_ATTR_BSS_HTOPMODE: HT operation mode (u16) * + * @NL80211_ATTR_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1029,6 +1033,8 @@ enum nl80211_attrs { NL80211_ATTR_BSS_HT_OPMODE, + NL80211_ATTR_KEY_DEFAULT_TYPES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1774,6 +1780,23 @@ enum nl80211_wpa_versions { NL80211_WPA_VERSION_2 = 1 << 1, }; +/** + * enum nl80211_key_default_types - key default types + * @__NL80211_KEY_DEFAULT_TYPE_INVALID: invalid + * @NL80211_KEY_DEFAULT_TYPE_UNICAST: key should be used as default + * unicast key + * @NL80211_KEY_DEFAULT_TYPE_MULTICAST: key should be used as default + * multicast key + * @NUM_NL80211_KEY_DEFAULT_TYPES: number of default types + */ +enum nl80211_key_default_types { + __NL80211_KEY_DEFAULT_TYPE_INVALID, + NL80211_KEY_DEFAULT_TYPE_UNICAST, + NL80211_KEY_DEFAULT_TYPE_MULTICAST, + + NUM_NL80211_KEY_DEFAULT_TYPES +}; + /** * enum nl80211_key_attributes - key attributes * @__NL80211_KEY_INVALID: invalid @@ -1790,6 +1813,9 @@ enum nl80211_wpa_versions { * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not * specified the default depends on whether a MAC address was * given with the command using the key or not (u32) + * @NL80211_KEY_DEFAULT_TYPES: A nested attribute containing flags + * attributes, specifying what a key should be set as default as. + * See &enum nl80211_key_default_types. * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute */ @@ -1802,6 +1828,7 @@ enum nl80211_key_attributes { NL80211_KEY_DEFAULT, NL80211_KEY_DEFAULT_MGMT, NL80211_KEY_TYPE, + NL80211_KEY_DEFAULT_TYPES, /* keep last */ __NL80211_KEY_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4d5acb013636..22be7c625b70 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1211,7 +1211,7 @@ struct cfg80211_ops { u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index); + u8 key_index, bool unicast, bool multicast); int (*set_default_mgmt_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -1393,6 +1393,8 @@ struct cfg80211_ops { * control port protocol ethertype. The device also honours the * control_port_no_encrypt flag. * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. + * @WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS: The device supports separate + * unicast and multicast TX keys. */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -1404,6 +1406,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), WIPHY_FLAG_IBSS_RSN = BIT(8), + WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS= BIT(9), }; struct mac_address { diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c30b8b72eedb..12f7dc048d34 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -295,7 +295,8 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx) + u8 key_idx, bool uni, + bool multi) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 73a7f6d354c9..53f044370cde 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -171,6 +171,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, + [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -182,6 +183,14 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = { .type = NLA_U32 }, + [NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, +}; + +/* policy for the key default flags */ +static const struct nla_policy +nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { + [NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG }, + [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; /* ifidx get helper */ @@ -314,6 +323,7 @@ struct key_parse { int idx; int type; bool def, defmgmt; + bool def_uni, def_multi; }; static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) @@ -327,6 +337,13 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; + if (k->def) { + k->def_uni = true; + k->def_multi = true; + } + if (k->defmgmt) + k->def_multi = true; + if (tb[NL80211_KEY_IDX]) k->idx = nla_get_u8(tb[NL80211_KEY_IDX]); @@ -349,6 +366,19 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) return -EINVAL; } + if (tb[NL80211_KEY_DEFAULT_TYPES]) { + struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; + int err = nla_parse_nested(kdt, + NUM_NL80211_KEY_DEFAULT_TYPES - 1, + tb[NL80211_KEY_DEFAULT_TYPES], + nl80211_key_default_policy); + if (err) + return err; + + k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; + k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; + } + return 0; } @@ -373,12 +403,32 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; + if (k->def) { + k->def_uni = true; + k->def_multi = true; + } + if (k->defmgmt) + k->def_multi = true; + if (info->attrs[NL80211_ATTR_KEY_TYPE]) { k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) return -EINVAL; } + if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { + struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; + int err = nla_parse_nested( + kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, + info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], + nl80211_key_default_policy); + if (err) + return err; + + k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; + k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; + } + return 0; } @@ -401,6 +451,11 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) if (k->def && k->defmgmt) return -EINVAL; + if (k->defmgmt) { + if (k->def_uni || !k->def_multi) + return -EINVAL; + } + if (k->idx != -1) { if (k->defmgmt) { if (k->idx < 4 || k->idx > 5) @@ -450,6 +505,8 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, goto error; def = 1; result->def = parse.idx; + if (!parse.def_uni || !parse.def_multi) + goto error; } else if (parse.defmgmt) goto error; err = cfg80211_validate_key_settings(rdev, &parse.p, @@ -1586,8 +1643,6 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) struct key_parse key; int err; struct net_device *dev = info->user_ptr[1]; - int (*func)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index); err = nl80211_parse_key(info, &key); if (err) @@ -1600,27 +1655,61 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!key.def && !key.defmgmt) return -EINVAL; - if (key.def) - func = rdev->ops->set_default_key; - else - func = rdev->ops->set_default_mgmt_key; + wdev_lock(dev->ieee80211_ptr); - if (!func) - return -EOPNOTSUPP; + if (key.def) { + if (!rdev->ops->set_default_key) { + err = -EOPNOTSUPP; + goto out; + } - wdev_lock(dev->ieee80211_ptr); - err = nl80211_key_allowed(dev->ieee80211_ptr); - if (!err) - err = func(&rdev->wiphy, dev, key.idx); + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + if (!(rdev->wiphy.flags & + WIPHY_FLAG_SUPPORTS_SEPARATE_DEFAULT_KEYS)) { + if (!key.def_uni || !key.def_multi) { + err = -EOPNOTSUPP; + goto out; + } + } + + err = rdev->ops->set_default_key(&rdev->wiphy, dev, key.idx, + key.def_uni, key.def_multi); + + if (err) + goto out; #ifdef CONFIG_CFG80211_WEXT - if (!err) { - if (func == rdev->ops->set_default_key) - dev->ieee80211_ptr->wext.default_key = key.idx; - else - dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; - } + dev->ieee80211_ptr->wext.default_key = key.idx; +#endif + } else { + if (key.def_uni || !key.def_multi) { + err = -EINVAL; + goto out; + } + + if (!rdev->ops->set_default_mgmt_key) { + err = -EOPNOTSUPP; + goto out; + } + + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + err = rdev->ops->set_default_mgmt_key(&rdev->wiphy, + dev, key.idx); + if (err) + goto out; + +#ifdef CONFIG_CFG80211_WEXT + dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; #endif + } + + out: wdev_unlock(dev->ieee80211_ptr); return err; diff --git a/net/wireless/util.c b/net/wireless/util.c index 4de624ca4c63..7620ae2fcf18 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -689,7 +689,8 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) continue; } if (wdev->connect_keys->def == i) - if (rdev->ops->set_default_key(wdev->wiphy, dev, i)) { + if (rdev->ops->set_default_key(wdev->wiphy, dev, + i, true, true)) { netdev_err(dev, "failed to set defkey %d\n", i); continue; } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 12222ee6ebf2..3e5dbd4e4cd5 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -548,8 +548,8 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, __cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } - err = rdev->ops->set_default_key(&rdev->wiphy, - dev, idx); + err = rdev->ops->set_default_key(&rdev->wiphy, dev, + idx, true, true); } if (!err) { wdev->wext.default_key = idx; @@ -627,8 +627,8 @@ int cfg80211_wext_siwencode(struct net_device *dev, err = 0; wdev_lock(wdev); if (wdev->current_bss) - err = rdev->ops->set_default_key(&rdev->wiphy, - dev, idx); + err = rdev->ops->set_default_key(&rdev->wiphy, dev, + idx, true, true); if (!err) wdev->wext.default_key = idx; wdev_unlock(wdev); -- cgit v1.2.3-59-g8ed1b From 0dbaee3b37e118a96bb7b8eb0d9bbaeeb46264be Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 13 Dec 2010 12:52:14 -0800 Subject: net: Abstract default ADVMSS behind an accessor. Make all RTAX_ADVMSS metric accesses go through a new helper function, dst_metric_advmss(). Leave the actual default metric as "zero" in the real metric slot, and compute the actual default value dynamically via a new dst_ops AF specific callback. For stacked IPSEC routes, we use the advmss of the path which preserves existing behavior. Unlike ipv4/ipv6, DecNET ties the advmss to the mtu and thus updates advmss on pmtu updates. This inconsistency in advmss handling results in more raw metric accesses than I wish we ended up with. Signed-off-by: David S. Miller --- drivers/scsi/cxgbi/libcxgbi.c | 2 +- include/net/dst.h | 14 +++++++++++++- include/net/dst_ops.h | 1 + net/decnet/af_decnet.c | 4 ++-- net/decnet/dn_route.c | 22 ++++++++++++++++------ net/ipv4/route.c | 24 +++++++++++++++++------- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_output.c | 14 +++++++++----- net/ipv6/route.c | 16 +++++++--------- net/ipv6/tcp_ipv6.c | 2 +- net/xfrm/xfrm_policy.c | 7 +++++++ 11 files changed, 75 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index be5661707dfa..d2ad3d676724 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -825,7 +825,7 @@ unsigned int cxgbi_sock_select_mss(struct cxgbi_sock *csk, unsigned int pmtu) unsigned int idx; struct dst_entry *dst = csk->dst; - csk->advmss = dst_metric(dst, RTAX_ADVMSS); + csk->advmss = dst_metric_advmss(dst); if (csk->advmss > pmtu - 40) csk->advmss = pmtu - 40; diff --git a/include/net/dst.h b/include/net/dst.h index 755ac6c1aa03..03a1c3d52d80 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -112,10 +112,22 @@ dst_metric_raw(const struct dst_entry *dst, const int metric) static inline u32 dst_metric(const struct dst_entry *dst, const int metric) { - WARN_ON_ONCE(metric == RTAX_HOPLIMIT); + WARN_ON_ONCE(metric == RTAX_HOPLIMIT || + metric == RTAX_ADVMSS); return dst_metric_raw(dst, metric); } +static inline u32 +dst_metric_advmss(const struct dst_entry *dst) +{ + u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); + + if (!advmss) + advmss = dst->ops->default_advmss(dst); + + return advmss; +} + static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val) { dst->_metrics[metric-1] = val; diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 51665b3461b8..15fb7af08c42 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -16,6 +16,7 @@ struct dst_ops { int (*gc)(struct dst_ops *ops); struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); + unsigned int (*default_advmss)(const struct dst_entry *); void (*destroy)(struct dst_entry *); void (*ifdown)(struct dst_entry *, struct net_device *dev, int how); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0065e7e14af4..2af15b15d1fa 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -829,7 +829,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation) return -EINVAL; scp->state = DN_CC; - scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); + scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); dn_send_conn_conf(sk, allocation); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -958,7 +958,7 @@ static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, sk->sk_route_caps = sk->sk_dst_cache->dev->features; sock->state = SS_CONNECTING; scp->state = DN_CI; - scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS); + scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); dn_nsp_send_conninit(sk, NSP_CI); err = -EINPROGRESS; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index e2e926841fe6..b8a5c0515be8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -110,6 +110,7 @@ static unsigned long dn_rt_deadline; static int dn_dst_gc(struct dst_ops *ops); static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); +static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); @@ -129,6 +130,7 @@ static struct dst_ops dn_dst_ops = { .gc_thresh = 128, .gc = dn_dst_gc, .check = dn_dst_check, + .default_advmss = dn_dst_default_advmss, .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, @@ -245,7 +247,8 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) } if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; - if (dst_metric(dst, RTAX_ADVMSS) > mss) + u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS); + if (!existing_mss || existing_mss > mss) dst_metric_set(dst, RTAX_ADVMSS, mss); } } @@ -795,12 +798,17 @@ static int dn_rt_bug(struct sk_buff *skb) return NET_RX_DROP; } +static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) +{ + return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); +} + static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; struct net_device *dev = rt->dst.dev; struct neighbour *n; - unsigned mss; + unsigned int metric; if (fi) { if (DN_FIB_RES_GW(*res) && @@ -820,10 +828,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) if (dst_metric(&rt->dst, RTAX_MTU) == 0 || dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || - dst_metric(&rt->dst, RTAX_ADVMSS) > mss) - dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); + metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); + if (metric) { + unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); + if (metric > mss) + dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); + } return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 770f70427f0b..80997333db0c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,6 +139,7 @@ static unsigned long expires_ljiffies; */ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); +static unsigned int ipv4_default_advmss(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -155,6 +156,7 @@ static struct dst_ops ipv4_dst_ops = { .protocol = cpu_to_be16(ETH_P_IP), .gc = rt_garbage_collect, .check = ipv4_dst_check, + .default_advmss = ipv4_default_advmss, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -383,8 +385,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v) (__force u32)r->rt_gateway, r->rt_flags, atomic_read(&r->dst.__refcnt), r->dst.__use, 0, (__force u32)r->rt_src, - (dst_metric(&r->dst, RTAX_ADVMSS) ? - (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), + dst_metric_advmss(&r->dst) + 40, dst_metric(&r->dst, RTAX_WINDOW), (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + dst_metric(&r->dst, RTAX_RTTVAR)), @@ -1798,6 +1799,19 @@ static void set_class_tag(struct rtable *rt, u32 tag) } #endif +static unsigned int ipv4_default_advmss(const struct dst_entry *dst) +{ + unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); + + if (advmss == 0) { + advmss = max_t(unsigned int, dst->dev->mtu - 40, + ip_rt_min_advmss); + if (advmss > 65535 - 40) + advmss = 65535 - 40; + } + return advmss; +} + static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { struct dst_entry *dst = &rt->dst; @@ -1823,11 +1837,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) if (dst_mtu(dst) > IP_MAX_MTU) dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); - if (dst_metric(dst, RTAX_ADVMSS) == 0) - dst_metric_set(dst, RTAX_ADVMSS, - max_t(unsigned int, dst->dev->mtu - 40, - ip_rt_min_advmss)); - if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40) + if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); #ifdef CONFIG_NET_CLS_ROUTE diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4fc3387aa994..f4011027543d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1436,7 +1436,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 97041f24cd27..2d390669d406 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk) struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; - if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { - mss = dst_metric(dst, RTAX_ADVMSS); - tp->advmss = mss; + if (dst) { + unsigned int metric = dst_metric_advmss(dst); + + if (metric < mss) { + mss = metric; + tp->advmss = mss; + } } return (__u16)mss; @@ -2422,7 +2426,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb_dst_set(skb, dst_clone(dst)); - mss = dst_metric(dst, RTAX_ADVMSS); + mss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; @@ -2556,7 +2560,7 @@ static void tcp_connect_init(struct sock *sk) if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); - tp->advmss = dst_metric(dst, RTAX_ADVMSS); + tp->advmss = dst_metric_advmss(dst); if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) tp->advmss = tp->rx_opt.user_mss; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 98796b0dc2b7..d9cb832be529 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -76,6 +76,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); +static unsigned int ip6_default_advmss(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -103,6 +104,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, + .default_advmss = ip6_default_advmss, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -937,8 +939,12 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) static int ipv6_get_mtu(struct net_device *dev); -static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) +static unsigned int ip6_default_advmss(const struct dst_entry *dst) { + struct net_device *dev = dst->dev; + unsigned int mtu = dst_mtu(dst); + struct net *net = dev_net(dev); + mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) @@ -990,7 +996,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1312,8 +1317,6 @@ install_route: if (!dst_mtu(&rt->dst)) dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); - if (!dst_metric(&rt->dst, RTAX_ADVMSS)) - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1540,8 +1543,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->rt6i_nexthop = neigh_clone(neigh); /* Reset pmtu, it may be better */ dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); - dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), - dst_mtu(&nrt->dst))); if (ip6_ins_rt(nrt)) goto out; @@ -1971,7 +1972,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; @@ -2041,7 +2041,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) { struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; struct inet6_dev *idev; - struct net *net = dev_net(arg->dev); /* In IPv6 pmtu discovery is not optional, so that RTAX_MTU lock cannot disable it. @@ -2073,7 +2072,6 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) (dst_mtu(&rt->dst) < arg->mtu && dst_mtu(&rt->dst) == idev->cnf.mtu6))) { dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); } return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fee076891646..20aa95e37359 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1521,7 +1521,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + newtp->advmss = dst_metric_advmss(dst); tcp_initialize_rcv_mss(newsk); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 6e50ccd8c532..36936c8ae961 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2361,6 +2361,11 @@ static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 1; } +static unsigned int xfrm_default_advmss(const struct dst_entry *dst) +{ + return dst_metric_advmss(dst->path); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { struct net *net; @@ -2378,6 +2383,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; + if (likely(dst_ops->default_advmss == NULL)) + dst_ops->default_advmss = xfrm_default_advmss; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) -- cgit v1.2.3-59-g8ed1b From d33e455337ea2c71d09d7f4367d6ad6dd32b6965 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 14 Dec 2010 13:01:14 -0800 Subject: net: Abstract default MTU metric calculation behind an accessor. Like RTAX_ADVMSS, make the default calculation go through a dst_ops method rather than caching the computation in the routing cache entries. Now dst metrics are pretty much left as-is when new entries are created, thus optimizing metric sharing becomes a real possibility. Signed-off-by: David S. Miller --- include/net/dst.h | 15 ++++++++------- include/net/dst_ops.h | 1 + net/decnet/dn_route.c | 10 ++++++++-- net/ipv4/route.c | 29 ++++++++++++++++++++--------- net/ipv6/route.c | 37 ++++++++++++++++--------------------- net/xfrm/xfrm_policy.c | 7 +++++++ 6 files changed, 60 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 03a1c3d52d80..93b0310317be 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -113,7 +113,8 @@ static inline u32 dst_metric(const struct dst_entry *dst, const int metric) { WARN_ON_ONCE(metric == RTAX_HOPLIMIT || - metric == RTAX_ADVMSS); + metric == RTAX_ADVMSS || + metric == RTAX_MTU); return dst_metric_raw(dst, metric); } @@ -156,11 +157,11 @@ dst_feature(const struct dst_entry *dst, u32 feature) static inline u32 dst_mtu(const struct dst_entry *dst) { - u32 mtu = dst_metric(dst, RTAX_MTU); - /* - * Alexey put it here, so ask him about it :) - */ - barrier(); + u32 mtu = dst_metric_raw(dst, RTAX_MTU); + + if (!mtu) + mtu = dst->ops->default_mtu(dst); + return mtu; } @@ -186,7 +187,7 @@ dst_allfrag(const struct dst_entry *dst) } static inline int -dst_metric_locked(struct dst_entry *dst, int metric) +dst_metric_locked(const struct dst_entry *dst, int metric) { return dst_metric(dst, RTAX_LOCK) & (1<dev, dst_mtu(dst)); } +static unsigned int dn_dst_default_mtu(const struct dst_entry *dst) +{ + return dst->dev->mtu; +} + static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) { struct dn_fib_info *fi = res->fi; @@ -825,8 +832,7 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) rt->dst.neighbour = n; } - if (dst_metric(&rt->dst, RTAX_MTU) == 0 || - dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) + if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); if (metric) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 80997333db0c..ae520963540f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -140,6 +140,7 @@ static unsigned long expires_ljiffies; static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); +static unsigned int ipv4_default_mtu(const struct dst_entry *dst); static void ipv4_dst_destroy(struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); @@ -157,6 +158,7 @@ static struct dst_ops ipv4_dst_ops = { .gc = rt_garbage_collect, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, + .default_mtu = ipv4_default_mtu, .destroy = ipv4_dst_destroy, .ifdown = ipv4_dst_ifdown, .negative_advice = ipv4_negative_advice, @@ -1812,6 +1814,23 @@ static unsigned int ipv4_default_advmss(const struct dst_entry *dst) return advmss; } +static unsigned int ipv4_default_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst->dev->mtu; + + if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { + const struct rtable *rt = (const struct rtable *) dst; + + if (rt->rt_gateway != rt->rt_dst && mtu > 576) + mtu = 576; + } + + if (mtu > IP_MAX_MTU) + mtu = IP_MAX_MTU; + + return mtu; +} + static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) { struct dst_entry *dst = &rt->dst; @@ -1822,18 +1841,10 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) rt->rt_gateway = FIB_RES_GW(*res); dst_import_metrics(dst, fi->fib_metrics); - if (fi->fib_mtu == 0) { - dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); - if (dst_metric_locked(dst, RTAX_MTU) && - rt->rt_gateway != rt->rt_dst && - dst->dev->mtu > 576) - dst_metric_set(dst, RTAX_MTU, 576); - } #ifdef CONFIG_NET_CLS_ROUTE dst->tclassid = FIB_RES_NH(*res).nh_tclassid; #endif - } else - dst_metric_set(dst, RTAX_MTU, dst->dev->mtu); + } if (dst_mtu(dst) > IP_MAX_MTU) dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d9cb832be529..e7efb269a6e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -77,6 +77,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); +static unsigned int ip6_default_mtu(const struct dst_entry *dst); static struct dst_entry *ip6_negative_advice(struct dst_entry *); static void ip6_dst_destroy(struct dst_entry *); static void ip6_dst_ifdown(struct dst_entry *, @@ -105,6 +106,7 @@ static struct dst_ops ip6_dst_ops_template = { .gc_thresh = 1024, .check = ip6_dst_check, .default_advmss = ip6_default_advmss, + .default_mtu = ip6_default_mtu, .destroy = ip6_dst_destroy, .ifdown = ip6_dst_ifdown, .negative_advice = ip6_negative_advice, @@ -937,8 +939,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } -static int ipv6_get_mtu(struct net_device *dev); - static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -961,6 +961,20 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) return mtu; } +static unsigned int ip6_default_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = IPV6_MIN_MTU; + struct inet6_dev *idev; + + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + static struct dst_entry *icmp6_dst_gc_list; static DEFINE_SPINLOCK(icmp6_dst_lock); @@ -995,7 +1009,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->rt6i_nexthop = neigh; atomic_set(&rt->dst.__refcnt, 1); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); rt->dst.output = ip6_output; #if 0 /* there's no chance to use these for ndisc */ @@ -1094,19 +1107,6 @@ out: Remove it only when all the things will work! */ -static int ipv6_get_mtu(struct net_device *dev) -{ - int mtu = IPV6_MIN_MTU; - struct inet6_dev *idev; - - rcu_read_lock(); - idev = __in6_dev_get(dev); - if (idev) - mtu = idev->cnf.mtu6; - rcu_read_unlock(); - return mtu; -} - int ip6_dst_hoplimit(struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); @@ -1315,8 +1315,6 @@ install_route: } } - if (!dst_mtu(&rt->dst)) - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; @@ -1541,8 +1539,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); nrt->rt6i_nexthop = neigh_clone(neigh); - /* Reset pmtu, it may be better */ - dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); if (ip6_ins_rt(nrt)) goto out; @@ -1971,7 +1967,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.output = ip6_output; rt->rt6i_dev = net->loopback_dev; rt->rt6i_idev = idev; - dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); rt->dst.obsolete = -1; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 36936c8ae961..8b3ef404c794 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2366,6 +2366,11 @@ static unsigned int xfrm_default_advmss(const struct dst_entry *dst) return dst_metric_advmss(dst->path); } +static unsigned int xfrm_default_mtu(const struct dst_entry *dst) +{ + return dst_mtu(dst->path); +} + int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { struct net *net; @@ -2385,6 +2390,8 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; + if (likely(dst_ops->default_mtu == NULL)) + dst_ops->default_mtu = xfrm_default_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) -- cgit v1.2.3-59-g8ed1b From a293911d4fd5e8593dbf478399a77f990d466269 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 14 Dec 2010 17:54:28 +0100 Subject: nl80211: advertise maximum remain-on-channel duration With the upcoming hardware offload implementation, some devices will have a different maximum duration for the remain-on-channel command. Advertise the maximum duration in mac80211, and make mac80211 set it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 5 +++++ include/net/cfg80211.h | 5 +++++ net/mac80211/main.c | 2 ++ net/wireless/nl80211.c | 7 ++++++- 4 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index b8fa25d741ba..1cee56b3a79a 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -773,6 +773,9 @@ enum nl80211_commands { * cache, a wiphy attribute. * * @NL80211_ATTR_DURATION: Duration of an operation in milliseconds, u32. + * @NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION: Device attribute that + * specifies the maximum duration that can be requested with the + * remain-on-channel operation, in milliseconds, u32. * * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects. * @@ -1035,6 +1038,8 @@ enum nl80211_attrs { NL80211_ATTR_KEY_DEFAULT_TYPES, + NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 22be7c625b70..f45e15f12446 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1474,6 +1474,9 @@ struct ieee80211_txrx_stypes { * * @available_antennas: bitmap of antennas which are available to configure. * antenna configuration commands will be rejected unless this is set. + * + * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation + * may request, if implemented. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1511,6 +1514,8 @@ struct wiphy { char fw_version[ETHTOOL_BUSINFO_LEN]; u32 hw_version; + u16 max_remain_on_channel_duration; + u8 max_num_pmkids; u32 available_antennas; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f7bdb7c78879..d87eb005690f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -745,6 +745,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + local->hw.wiphy->max_remain_on_channel_duration = 5000; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 53f044370cde..594a6ac8b9d2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -755,6 +755,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + if (dev->ops->remain_on_channel) + NLA_PUT_U32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + dev->wiphy.max_remain_on_channel_duration); + /* for now at least assume all drivers have it */ if (dev->ops->mgmt_tx) NLA_PUT_FLAG(msg, NL80211_ATTR_OFFCHANNEL_TX_OK); @@ -4228,7 +4232,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, * We should be on that channel for at least one jiffie, * and more than 5 seconds seems excessive. */ - if (!duration || !msecs_to_jiffies(duration) || duration > 5000) + if (!duration || !msecs_to_jiffies(duration) || + duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; if (!rdev->ops->remain_on_channel) -- cgit v1.2.3-59-g8ed1b From bd2ce6e43f65127bc723e7fcc044758cf8113260 Mon Sep 17 00:00:00 2001 From: Sujith Manoharan Date: Wed, 15 Dec 2010 07:47:10 +0530 Subject: mac80211: Add timeout to BA session start API Allow drivers or rate control algorithms to specify BlockAck session timeout when initiating an ADDBA transaction. This is useful in cases where maintaining persistent BA sessions does not incur any overhead. The current timeout value of 5000 TUs is retained for all non ath9k/ath9k_htc drivers. Signed-off-by: Sujith Manoharan Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 2 +- drivers/net/wireless/ath/ath9k/rc.c | 2 +- drivers/net/wireless/iwlwifi/iwl-agn-rs.c | 2 +- drivers/net/wireless/rtlwifi/rc.c | 2 +- include/net/mac80211.h | 4 +++- net/mac80211/agg-tx.c | 7 +++++-- net/mac80211/debugfs_sta.c | 2 +- net/mac80211/rc80211_minstrel_ht.c | 2 +- net/mac80211/sta_info.h | 2 ++ 9 files changed, 16 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index 31fad82239b3..33f36029fa4f 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -251,7 +251,7 @@ void ath9k_tx_tasklet(unsigned long data) ista = (struct ath9k_htc_sta *)sta->drv_priv; if (ath9k_htc_check_tx_aggr(priv, ista, tid)) { - ieee80211_start_tx_ba_session(sta, tid); + ieee80211_start_tx_ba_session(sta, tid, 0); spin_lock_bh(&priv->tx_lock); ista->tid_state[tid] = AGGR_PROGRESS; spin_unlock_bh(&priv->tx_lock); diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c index 2061a755a026..896d12986b1e 100644 --- a/drivers/net/wireless/ath/ath9k/rc.c +++ b/drivers/net/wireless/ath/ath9k/rc.c @@ -1373,7 +1373,7 @@ static void ath_tx_status(void *priv, struct ieee80211_supported_band *sband, an = (struct ath_node *)sta->drv_priv; if(ath_tx_aggr_check(sc, an, tid)) - ieee80211_start_tx_ba_session(sta, tid); + ieee80211_start_tx_ba_session(sta, tid, 0); } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c index ee123482e1d5..5083dba122ca 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c @@ -387,7 +387,7 @@ static int rs_tl_turn_on_agg_for_tid(struct iwl_priv *priv, if (load > IWL_AGG_LOAD_THRESHOLD) { IWL_DEBUG_HT(priv, "Starting Tx agg: STA: %pM tid: %d\n", sta->addr, tid); - ret = ieee80211_start_tx_ba_session(sta, tid); + ret = ieee80211_start_tx_ba_session(sta, tid, 5000); if (ret == -EAGAIN) { /* * driver and mac80211 is out of sync diff --git a/drivers/net/wireless/rtlwifi/rc.c b/drivers/net/wireless/rtlwifi/rc.c index 904b8fd01f6d..91634107434a 100644 --- a/drivers/net/wireless/rtlwifi/rc.c +++ b/drivers/net/wireless/rtlwifi/rc.c @@ -169,7 +169,7 @@ static void rtl_tx_status(void *ppriv, tid = qc[0] & 0xf; if (_rtl_tx_aggr_check(rtlpriv, tid)) - ieee80211_start_tx_ba_session(sta, tid); + ieee80211_start_tx_ba_session(sta, tid, 5000); } } } diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e411cf87fb41..69ded1ee49ce 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2435,6 +2435,7 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, * ieee80211_start_tx_ba_session - Start a tx Block Ack session. * @sta: the station for which to start a BA session * @tid: the TID to BA on. + * @timeout: session timeout value (in TUs) * * Return: success if addBA request was sent, failure otherwise * @@ -2442,7 +2443,8 @@ void ieee80211_queue_delayed_work(struct ieee80211_hw *hw, * the need to start aggregation on a certain RA/TID, the session level * will be managed by the mac80211. */ -int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid); +int ieee80211_start_tx_ba_session(struct ieee80211_sta *sta, u16 tid, + u16 timeout); /** * ieee80211_start_tx_ba_cb_irqsafe - low level driver ready to aggregate. diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index d4679b265ba8..9cc472c6a6a5 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -342,10 +342,11 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, tid_tx->dialog_token, start_seq_num, - 0x40, 5000); + 0x40, tid_tx->timeout); } -int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) +int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, + u16 timeout) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -420,6 +421,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid) skb_queue_head_init(&tid_tx->pending); __set_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + tid_tx->timeout = timeout; + /* Tx timer */ tid_tx->addba_resp_timer.function = sta_addba_resp_timer_expired; tid_tx->addba_resp_timer.data = (unsigned long)&sta->timer_to_tid[tid]; diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 8bb5af85f469..c04a1396cf8d 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -189,7 +189,7 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu if (tx) { if (start) - ret = ieee80211_start_tx_ba_session(&sta->sta, tid); + ret = ieee80211_start_tx_ba_session(&sta->sta, tid, 5000); else ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); } else { diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4ad7a362fcc1..165a4518bb48 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -374,7 +374,7 @@ minstrel_aggr_check(struct minstrel_priv *mp, struct ieee80211_sta *pubsta, stru if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO) return; - ieee80211_start_tx_ba_session(pubsta, tid); + ieee80211_start_tx_ba_session(pubsta, tid, 5000); } static void diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index fdca52cf88de..bbdd2a86a94b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -78,6 +78,7 @@ enum ieee80211_sta_info_flags { * @addba_resp_timer: timer for peer's response to addba request * @pending: pending frames queue -- use sta's spinlock to protect * @dialog_token: dialog token for aggregation session + * @timeout: session timeout value to be filled in ADDBA requests * @state: session state (see above) * @stop_initiator: initiator of a session stop * @tx_stop: TX DelBA frame when stopping @@ -96,6 +97,7 @@ struct tid_ampdu_tx { struct timer_list addba_resp_timer; struct sk_buff_head pending; unsigned long state; + u16 timeout; u8 dialog_token; u8 stop_initiator; bool tx_stop; -- cgit v1.2.3-59-g8ed1b From cf4e594ea7e55555e81647b74a3a8e8b2826a529 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 16 Dec 2010 00:52:40 +0200 Subject: nl80211: Add notification for dropped Deauth/Disassoc Add a new notification to indicate that a received, unprotected Deauthentication or Disassociation frame was dropped due to management frame protection being in use. This notification is needed to allow user space (e.g., wpa_supplicant) to implement SA Query procedure to recover from association state mismatch between an AP and STA. This is needed to avoid getting stuck in non-working state when MFP (IEEE 802.11w) is used and a protected Deauthentication or Disassociation frame is dropped for any reason. After that, the station would silently discard any unprotected Deauthentication or Disassociation frame that could be indicating that the AP does not have association for the STA (when the Reason Code would be 6 or 7). IEEE Std 802.11w-2009, 11.13 describes this recovery mechanism. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 10 ++++++++++ include/net/cfg80211.h | 26 ++++++++++++++++++++++++++ net/mac80211/rx.c | 22 ++++++++++++++++++++-- net/wireless/mlme.c | 22 ++++++++++++++++++++++ net/wireless/nl80211.c | 16 ++++++++++++++++ net/wireless/nl80211.h | 6 ++++++ 6 files changed, 100 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 1cee56b3a79a..7483a89cee8f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -399,6 +399,13 @@ * @NL80211_CMD_LEAVE_MESH: Leave the mesh network -- no special arguments, the * network is determined by the network interface. * + * @NL80211_CMD_UNPROT_DEAUTHENTICATE: Unprotected deauthentication frame + * notification. This event is used to indicate that an unprotected + * deauthentication frame was dropped when MFP is in use. + * @NL80211_CMD_UNPROT_DISASSOCIATE: Unprotected disassociation frame + * notification. This event is used to indicate that an unprotected + * disassociation frame was dropped when MFP is in use. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -508,6 +515,9 @@ enum nl80211_commands { NL80211_CMD_JOIN_MESH, NL80211_CMD_LEAVE_MESH, + NL80211_CMD_UNPROT_DEAUTHENTICATE, + NL80211_CMD_UNPROT_DISASSOCIATE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f45e15f12446..3d1c09b777e8 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2359,6 +2359,32 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len); void __cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len); +/** + * cfg80211_send_unprot_deauth - notification of unprotected deauthentication + * @dev: network device + * @buf: deauthentication frame (header + body) + * @len: length of the frame data + * + * This function is called whenever a received Deauthentication frame has been + * dropped in station mode because of MFP being used but the Deauthentication + * frame was not protected. This function may sleep. + */ +void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, + size_t len); + +/** + * cfg80211_send_unprot_disassoc - notification of unprotected disassociation + * @dev: network device + * @buf: disassociation frame (header + body) + * @len: length of the frame data + * + * This function is called whenever a received Disassociation frame has been + * dropped in station mode because of MFP being used but the Disassociation + * frame was not protected. This function may sleep. + */ +void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, + size_t len); + /** * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP) * @dev: network device diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 052789ef4745..4573ce1e1d15 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1540,12 +1540,30 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx) if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) { if (unlikely(!ieee80211_has_protected(fc) && ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && - rx->key)) + rx->key)) { + if (ieee80211_is_deauth(fc)) + cfg80211_send_unprot_deauth(rx->sdata->dev, + rx->skb->data, + rx->skb->len); + else if (ieee80211_is_disassoc(fc)) + cfg80211_send_unprot_disassoc(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; + } /* BIP does not use Protected field, so need to check MMIE */ if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && - ieee80211_get_mmie_keyidx(rx->skb) < 0)) + ieee80211_get_mmie_keyidx(rx->skb) < 0)) { + if (ieee80211_is_deauth(fc)) + cfg80211_send_unprot_deauth(rx->sdata->dev, + rx->skb->data, + rx->skb->len); + else if (ieee80211_is_disassoc(fc)) + cfg80211_send_unprot_disassoc(rx->sdata->dev, + rx->skb->data, + rx->skb->len); return -EACCES; + } /* * When using MFP, Action frames are not allowed prior to * having configured keys. diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d7680f2a4c5b..aa5df8865ff7 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -263,6 +263,28 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len) } EXPORT_SYMBOL(cfg80211_send_disassoc); +void cfg80211_send_unprot_deauth(struct net_device *dev, const u8 *buf, + size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_unprot_deauth(rdev, dev, buf, len, GFP_ATOMIC); +} +EXPORT_SYMBOL(cfg80211_send_unprot_deauth); + +void cfg80211_send_unprot_disassoc(struct net_device *dev, const u8 *buf, + size_t len) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + + nl80211_send_unprot_disassoc(rdev, dev, buf, len, GFP_ATOMIC); +} +EXPORT_SYMBOL(cfg80211_send_unprot_disassoc); + static void __cfg80211_auth_remove(struct wireless_dev *wdev, const u8 *addr) { int i; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 594a6ac8b9d2..aefce54d47e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5473,6 +5473,22 @@ void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, NL80211_CMD_DISASSOCIATE, gfp); } +void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len, gfp_t gfp) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_UNPROT_DEAUTHENTICATE, gfp); +} + +void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len, gfp_t gfp) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_UNPROT_DISASSOCIATE, gfp); +} + static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, const u8 *addr, gfp_t gfp) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 16c2f7190768..e3f7fa886966 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -25,6 +25,12 @@ void nl80211_send_deauth(struct cfg80211_registered_device *rdev, void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_unprot_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_unprot_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len, gfp_t gfp); void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp); -- cgit v1.2.3-59-g8ed1b From 2784fe915cd25adf23ea28534019308d8a144721 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 15 Dec 2010 19:24:11 -0800 Subject: cfg80211: fix null pointer dereference with a custom regulatory request Once we moved the core regulatory request to the queue and let the scheduler process it last_request will have been left NULL until the schedular decides to process the first request. When this happens and we are loading a driver with a custom regulatory request like all Atheros drivers we end up with a NULL pointer dereference. We fix this by checking if the request was a custom one. BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 IP: [] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211] PGD 71f91067 PUD 712b2067 PMD 0 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/devices/pci0000:00/0000:00:1d.7/usb2/2-1/firmware/2-1/loading CPU 0 Modules linked in: ath9k_htc(+) ath9k_common ath9k_hw ath Pid: 3094, comm: insmod Tainted: G W 2.6.37-rc5-wl #16 INVALID/28427ZQ RIP: 0010:[] [] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211] RSP: 0018:ffff88007045db78 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffffa047d9a0 RCX: ffff88007045dbd0 RDX: 0000000000004e20 RSI: 000000000024cde0 RDI: ffff8800700483e0 RBP: ffff88007045db98 R08: ffffffffa02f5b40 R09: 0000000000000001 R10: 000000000000000e R11: 0000000000000001 R12: 0000000000000000 R13: ffff88007004e3b0 R14: 0000000000000000 R15: ffff880070048340 FS: 00007f635a707700(0000) GS:ffff880077400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000004 CR3: 00000000708a9000 CR4: 00000000000006f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process insmod (pid: 3094, threadinfo ffff88007045c000, task ffff8800713e3ec0) Stack: ffffffffa047d9a0 0000000000000000 ffff88007004e3b0 0000000000000000 ffff88007045dc08 ffffffffa016e147 000000007045dc08 0000000000000002 ffff8800700483e0 ffffffffa02f5b40 ffff88007045dbd8 0000000000000000 Call Trace: [] wiphy_apply_custom_regulatory+0x137/0x1d0 [cfg80211] [] ? ath9k_reg_notifier+0x0/0x50 [ath9k_htc] [] ath_regd_init+0x347/0x430 [ath] [] ath9k_htc_probe_device+0x6c5/0x960 [ath9k_htc] [] ath9k_htc_hw_init+0xc/0x30 [ath9k_htc] [] ath9k_hif_usb_probe+0x216/0x3b0 [ath9k_htc] [] usb_probe_interface+0x10c/0x210 [usbcore] [] driver_probe_device+0x96/0x1c0 [] __driver_attach+0xa3/0xb0 [] ? __driver_attach+0x0/0xb0 [] bus_for_each_dev+0x5e/0x90 [] driver_attach+0x19/0x20 [] bus_add_driver+0x168/0x320 [] driver_register+0x71/0x140 [] ? __raw_spin_lock_init+0x38/0x70 [] usb_register_driver+0xdc/0x190 [usbcore] [] ? ath9k_htc_init+0x0/0x4f [ath9k_htc] [] ath9k_hif_usb_init+0x1e/0x20 [ath9k_htc] [] ath9k_htc_init+0x2b/0x4f [ath9k_htc] [] do_one_initcall+0x3f/0x180 [] sys_init_module+0xbb/0x200 [] system_call_fastpath+0x16/0x1b Code: RIP [] freq_reg_info_regd.clone.2+0x27/0x130 [cfg80211] RSP CR2: 0000000000000004 ---[ end trace 79e4193601c8b713 ]--- Reported-by: Sujith Manoharan Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 +++- net/wireless/reg.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 3d1c09b777e8..6dc665a727c2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1419,7 +1419,9 @@ struct ieee80211_txrx_stypes { /** * struct wiphy - wireless hardware description - * @reg_notifier: the driver's regulatory notification callback + * @reg_notifier: the driver's regulatory notification callback, + * note that if your driver uses wiphy_apply_custom_regulatory() + * the reg_notifier's request can be passed as NULL * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver * on the reg_notifier() if it chooses to ignore future diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5ed615f94e0c..99d41831d76e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -661,7 +661,8 @@ static int freq_reg_info_regd(struct wiphy *wiphy, * Follow the driver's regulatory domain, if present, unless a country * IE has been processed or a user wants to help complaince further */ - if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + if (!custom_regd && + last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && last_request->initiator != NL80211_REGDOM_SET_BY_USER && wiphy->regd) regd = wiphy->regd; -- cgit v1.2.3-59-g8ed1b From 443457242beb6716b43db4d62fe148eab5515505 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 13 Dec 2010 12:44:07 +0000 Subject: net: factorize sync-rcu call in unregister_netdevice_many Add dev_close_many and dev_deactivate_many to factorize another sync-rcu operation on the netdevice unregister path. $ modprobe dummy numdummies=10000 $ ip link set dev dummy* up $ time rmmod dummy Without the patch With the patch real 0m 24.63s real 0m 5.15s user 0m 0.00s user 0m 0.00s sys 0m 6.05s sys 0m 5.14s Signed-off-by: Octavian Purdila Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/core/dev.c | 118 +++++++++++++++++++++++++++++----------------- net/sched/sch_generic.c | 29 +++++++++--- 3 files changed, 99 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ea1f8a83160d..786cc396cb4a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -321,6 +321,7 @@ extern void dev_init_scheduler(struct net_device *dev); extern void dev_shutdown(struct net_device *dev); extern void dev_activate(struct net_device *dev); extern void dev_deactivate(struct net_device *dev); +extern void dev_deactivate_many(struct list_head *head); extern struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, struct Qdisc *qdisc); extern void qdisc_reset(struct Qdisc *qdisc); diff --git a/net/core/dev.c b/net/core/dev.c index 7ac26d2b9722..794b20de5d44 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1222,52 +1222,90 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close(struct net_device *dev) +static int __dev_close_many(struct list_head *head) { - const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *dev; ASSERT_RTNL(); might_sleep(); - /* - * Tell people we are going down, so that they can - * prepare to death, when device is still operating. - */ - call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); + list_for_each_entry(dev, head, unreg_list) { + /* + * Tell people we are going down, so that they can + * prepare to death, when device is still operating. + */ + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); - clear_bit(__LINK_STATE_START, &dev->state); + clear_bit(__LINK_STATE_START, &dev->state); - /* Synchronize to scheduled poll. We cannot touch poll list, - * it can be even on different cpu. So just clear netif_running(). - * - * dev->stop() will invoke napi_disable() on all of it's - * napi_struct instances on this device. - */ - smp_mb__after_clear_bit(); /* Commit netif_running(). */ + /* Synchronize to scheduled poll. We cannot touch poll list, it + * can be even on different cpu. So just clear netif_running(). + * + * dev->stop() will invoke napi_disable() on all of it's + * napi_struct instances on this device. + */ + smp_mb__after_clear_bit(); /* Commit netif_running(). */ + } - dev_deactivate(dev); + dev_deactivate_many(head); - /* - * Call the device specific close. This cannot fail. - * Only if device is UP - * - * We allow it to be called even after a DETACH hot-plug - * event. - */ - if (ops->ndo_stop) - ops->ndo_stop(dev); + list_for_each_entry(dev, head, unreg_list) { + const struct net_device_ops *ops = dev->netdev_ops; - /* - * Device is now down. - */ + /* + * Call the device specific close. This cannot fail. + * Only if device is UP + * + * We allow it to be called even after a DETACH hot-plug + * event. + */ + if (ops->ndo_stop) + ops->ndo_stop(dev); + + /* + * Device is now down. + */ + + dev->flags &= ~IFF_UP; + + /* + * Shutdown NET_DMA + */ + net_dmaengine_put(); + } - dev->flags &= ~IFF_UP; + return 0; +} + +static int __dev_close(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + return __dev_close_many(&single); +} + +int dev_close_many(struct list_head *head) +{ + struct net_device *dev, *tmp; + LIST_HEAD(tmp_list); + + list_for_each_entry_safe(dev, tmp, head, unreg_list) + if (!(dev->flags & IFF_UP)) + list_move(&dev->unreg_list, &tmp_list); + + __dev_close_many(head); /* - * Shutdown NET_DMA + * Tell people we are down */ - net_dmaengine_put(); + list_for_each_entry(dev, head, unreg_list) { + rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); + call_netdevice_notifiers(NETDEV_DOWN, dev); + } + /* rollback_registered_many needs the complete original list */ + list_splice(&tmp_list, head); return 0; } @@ -1282,16 +1320,10 @@ static int __dev_close(struct net_device *dev) */ int dev_close(struct net_device *dev) { - if (!(dev->flags & IFF_UP)) - return 0; - - __dev_close(dev); + LIST_HEAD(single); - /* - * Tell people we are down - */ - rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING); - call_netdevice_notifiers(NETDEV_DOWN, dev); + list_add(&dev->unreg_list, &single); + dev_close_many(&single); return 0; } @@ -4963,10 +4995,12 @@ static void rollback_registered_many(struct list_head *head) } BUG_ON(dev->reg_state != NETREG_REGISTERED); + } - /* If device is running, close it first. */ - dev_close(dev); + /* If device is running, close it first. */ + dev_close_many(head); + list_for_each_entry(dev, head, unreg_list) { /* And unlink it from device chain. */ unlist_netdevice(dev); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0918834ee4a1..34dc598440a2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -810,20 +810,35 @@ static bool some_qdisc_is_busy(struct net_device *dev) return false; } -void dev_deactivate(struct net_device *dev) +void dev_deactivate_many(struct list_head *head) { - netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); - if (dev_ingress_queue(dev)) - dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc); + struct net_device *dev; - dev_watchdog_down(dev); + list_for_each_entry(dev, head, unreg_list) { + netdev_for_each_tx_queue(dev, dev_deactivate_queue, + &noop_qdisc); + if (dev_ingress_queue(dev)) + dev_deactivate_queue(dev, dev_ingress_queue(dev), + &noop_qdisc); + + dev_watchdog_down(dev); + } /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - while (some_qdisc_is_busy(dev)) - yield(); + list_for_each_entry(dev, head, unreg_list) + while (some_qdisc_is_busy(dev)) + yield(); +} + +void dev_deactivate(struct net_device *dev) +{ + LIST_HEAD(single); + + list_add(&dev->unreg_list, &single); + dev_deactivate_many(&single); } static void dev_init_scheduler_queue(struct net_device *dev, -- cgit v1.2.3-59-g8ed1b From bc2ce894e113ed95b92541134b002fdc641e8080 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Dec 2010 14:08:34 -0800 Subject: tcp: relax tcp_paws_check() Some windows versions have wrong RFC1323 implementations, with SYN and SYNACKS messages containing zero tcp timestamps. We relaxed in commit fc1ad92dfc4e363 the passive connection case (Windows connects to a linux machine), but the reverse case (linux connects to a Windows machine) has an analogue problem when tsvals from windows machine are 'negative' (high order bit set) : PAWS triggers and we drops incoming messages. Fix this by making zero ts_recent value special, allowing frame to be processed. Based on a report and initial patch from Dmitiy Balakin Bugzilla reference : https://bugzilla.kernel.org/show_bug.cgi?id=24842 Reported-by: dmitriy.balakin@nicneiron.ru Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 3f227baee4be..2ab6c9c1c53a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1038,7 +1038,13 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, return 1; if (unlikely(get_seconds() >= rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS)) return 1; - + /* + * Some OSes send SYN and SYNACK messages with tsval=0 tsecr=0, + * then following tcp messages have valid values. Ignore 0 value, + * or else 'negative' tsval might forbid us to accept their packets. + */ + if (!rx_opt->ts_recent) + return 1; return 0; } -- cgit v1.2.3-59-g8ed1b From fcbdf09d9652c8919dcf47072e3ae7dcb4eb98ac Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Thu, 16 Dec 2010 14:26:56 -0800 Subject: net: fix nulls list corruptions in sk_prot_alloc Special care is taken inside sk_port_alloc to avoid overwriting skc_node/skc_nulls_node. We should also avoid overwriting skc_bind_node/skc_portaddr_node. The patch fixes the following crash: BUG: unable to handle kernel paging request at fffffffffffffff0 IP: [] udp4_lib_lookup2+0xad/0x370 [] __udp4_lib_lookup+0x282/0x360 [] __udp4_lib_rcv+0x31e/0x700 [] ? ip_local_deliver_finish+0x65/0x190 [] ? ip_local_deliver+0x88/0xa0 [] udp_rcv+0x15/0x20 [] ip_local_deliver_finish+0x65/0x190 [] ip_local_deliver+0x88/0xa0 [] ip_rcv_finish+0x32d/0x6f0 [] ? netif_receive_skb+0x99c/0x11c0 [] ip_rcv+0x2bb/0x350 [] netif_receive_skb+0x99c/0x11c0 Signed-off-by: Leonard Crestez Signed-off-by: Octavian Purdila Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ net/core/sock.c | 47 +++++++++++++++++++++++++++++++++++------------ net/ipv4/udp.c | 1 + net/ipv4/udplite.c | 1 + net/ipv6/udp.c | 1 + net/ipv6/udplite.c | 1 + 6 files changed, 42 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 659d968d95c5..7d3f7ce239b5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -754,6 +754,7 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); + void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@ -852,6 +853,8 @@ static inline void __sk_prot_rehash(struct sock *sk) sk->sk_prot->hash(sk); } +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/net/core/sock.c b/net/core/sock.c index fb6080111461..e5af8d5d5b50 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1009,6 +1009,36 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } +/* + * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes + * un-modified. Special care is taken when initializing object to zero. + */ +static inline void sk_prot_clear_nulls(struct sock *sk, int size) +{ + if (offsetof(struct sock, sk_node.next) != 0) + memset(sk, 0, offsetof(struct sock, sk_node.next)); + memset(&sk->sk_node.pprev, 0, + size - offsetof(struct sock, sk_node.pprev)); +} + +void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) +{ + unsigned long nulls1, nulls2; + + nulls1 = offsetof(struct sock, __sk_common.skc_node.next); + nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); + if (nulls1 > nulls2) + swap(nulls1, nulls2); + + if (nulls1 != 0) + memset((char *)sk, 0, nulls1); + memset((char *)sk + nulls1 + sizeof(void *), 0, + nulls2 - nulls1 - sizeof(void *)); + memset((char *)sk + nulls2 + sizeof(void *), 0, + size - nulls2 - sizeof(void *)); +} +EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); + static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { @@ -1021,19 +1051,12 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, if (!sk) return sk; if (priority & __GFP_ZERO) { - /* - * caches using SLAB_DESTROY_BY_RCU should let - * sk_node.next un-modified. Special care is taken - * when initializing object to zero. - */ - if (offsetof(struct sock, sk_node.next) != 0) - memset(sk, 0, offsetof(struct sock, sk_node.next)); - memset(&sk->sk_node.pprev, 0, - prot->obj_size - offsetof(struct sock, - sk_node.pprev)); + if (prot->clear_sk) + prot->clear_sk(sk, prot->obj_size); + else + sk_prot_clear_nulls(sk, prot->obj_size); } - } - else + } else sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5e0a3a582a59..2d3ded4d0786 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1899,6 +1899,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index ab76aa928fa9..aee9963f7f5a 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -57,6 +57,7 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91def93bec85..cd6cb7c3e563 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1477,6 +1477,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 5f48fadc27f7..986c4de5292e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -55,6 +55,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + .clear_sk = sk_prot_clear_portaddr_nulls, }; static struct inet_protosw udplite6_protosw = { -- cgit v1.2.3-59-g8ed1b From ad0081e43af6de3fecf308b0d098f9611835766b Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 17 Dec 2010 11:42:42 +0000 Subject: ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed. This patch modifies IPsec6 to fragment IPv6 packets that are locally generated as needed. This version of the patch only fragments in tunnel mode, so that fragment headers will not be obscured by ESP in transport mode. Signed-off-by: David L Stevens Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ip6_route.h | 10 ++++++++++ net/ipv6/ip6_output.c | 12 ++---------- net/ipv6/xfrm6_output.c | 16 +++++++++++++++- 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 278312c95f96..2ab926860cd8 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -164,5 +164,15 @@ static inline int ipv6_unicast_destination(struct sk_buff *skb) return rt->rt6i_flags & RTF_LOCAL; } +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); + +static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +{ + struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + + return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? + skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); +} + #endif #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 99157b4cd56e..94b5bf132b2e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include #include -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); int __ip6_local_out(struct sk_buff *skb) { @@ -145,14 +145,6 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) -{ - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); -} - static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || @@ -601,7 +593,7 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) return offset; } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6434bd5ce088..8e688b3de9ab 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,7 @@ #include #include #include +#include #include int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@ -88,8 +89,21 @@ static int xfrm6_output_finish(struct sk_buff *skb) return xfrm_output(skb); } +static int __xfrm6_output(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + + if ((x && x->props.mode == XFRM_MODE_TUNNEL) && + ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb_dst(skb)))) { + return ip6_fragment(skb, xfrm6_output_finish); + } + return xfrm6_output_finish(skb); +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, xfrm6_output_finish); + skb_dst(skb)->dev, __xfrm6_output); } -- cgit v1.2.3-59-g8ed1b From 4c306a9291a077879fc3e933326caac3bc319caa Mon Sep 17 00:00:00 2001 From: Shan Wei Date: Sun, 19 Dec 2010 21:59:35 -0800 Subject: net: kill unused macros These macros never be used, so remove them. Signed-off-by: Shan Wei Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 1 - include/net/tcp.h | 2 -- net/atm/mpc.c | 2 -- net/core/neighbour.c | 1 - net/core/netpoll.c | 1 - net/netlabel/netlabel_cipso_v4.h | 1 - net/netlabel/netlabel_mgmt.h | 1 - net/netlabel/netlabel_unlabeled.h | 1 - 8 files changed, 10 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 6c93a56cc958..6ac4e3b5007f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -132,7 +132,6 @@ struct inet_connection_sock { #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ #define ICSK_TIME_DACK 2 /* Delayed ack timer */ #define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 2ab6c9c1c53a..b4480300cadf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1158,8 +1158,6 @@ struct tcp_md5sig_pool { union tcp_md5sum_block md5_blk; }; -#define TCP_MD5SIG_MAXKEYS (~(u32)0) /* really?! */ - /* - functions */ extern int tcp_v4_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct sock *sk, struct request_sock *req, diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 74bcc662c3dd..644cdf071642 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -64,8 +64,6 @@ do { if (0) printk(KERN_CONT format, ##args); } while (0) #endif -#define MPOA_TAG_LEN 4 - /* mpc_daemon -> kernel */ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc); static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8cc8f9a79db9..60a902913429 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -41,7 +41,6 @@ #define NEIGH_PRINTK(x...) printk(x) #define NEIGH_NOPRINTK(x...) do { ; } while(0) -#define NEIGH_PRINTK0 NEIGH_PRINTK #define NEIGH_PRINTK1 NEIGH_NOPRINTK #define NEIGH_PRINTK2 NEIGH_NOPRINTK diff --git a/net/core/netpoll.c b/net/core/netpoll.c index ee38acb6d463..72d9b50109fc 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -35,7 +35,6 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 -#define MAX_QUEUE_DEPTH (MAX_SKBS / 2) static struct sk_buff_head skb_pool; diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index c8a4079261f0..af7f3355103e 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -107,7 +107,6 @@ enum { NLBL_CIPSOV4_C_LISTALL, __NLBL_CIPSOV4_C_MAX, }; -#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) /* NetLabel CIPSOv4 attributes */ enum { diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index 05d96431f819..0a25838bcf45 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -173,7 +173,6 @@ enum { NLBL_MGMT_C_VERSION, __NLBL_MGMT_C_MAX, }; -#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) /* NetLabel Management attributes */ enum { diff --git a/net/netlabel/netlabel_unlabeled.h b/net/netlabel/netlabel_unlabeled.h index 7aba63595137..0bc8dc3f9e3c 100644 --- a/net/netlabel/netlabel_unlabeled.h +++ b/net/netlabel/netlabel_unlabeled.h @@ -180,7 +180,6 @@ enum { NLBL_UNLABEL_C_STATICLISTDEF, __NLBL_UNLABEL_C_MAX, }; -#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) /* NetLabel Unlabeled attributes */ enum { -- cgit v1.2.3-59-g8ed1b From 173021072e86a0a5b3d2271347493a3e0d5f68e8 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Mon, 20 Dec 2010 04:35:30 +0000 Subject: net_sched: always clone skbs Pawel reported a panic related to handling shared skbs in ixgbe incorrectly. So we need to revert my previous patch to work around this bug. Instead of reverting the patch completely, I just revert the essential lines, so we can add the previous optimization back more easily in future. commit 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Author: Changli Gao Date: Sat Oct 16 13:04:08 2010 +0000 net_sched: remove the unused parameter of qdisc_create_dflt() Reported-by: Pawel Staszewski Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sch_generic.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index ea1f8a83160d..79f34e2b752f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -610,11 +610,7 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask, { struct sk_buff *n; - if ((action == TC_ACT_STOLEN || action == TC_ACT_QUEUED) && - !skb_shared(skb)) - n = skb_get(skb); - else - n = skb_clone(skb, gfp_mask); + n = skb_clone(skb, gfp_mask); if (n) { n->tc_verd = SET_TC_VERD(n->tc_verd, 0); -- cgit v1.2.3-59-g8ed1b From 6561a3b12d62ed5317e6ac32182d87a03f62c8dc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 19 Dec 2010 21:11:20 -0800 Subject: ipv4: Flush per-ns routing cache more sanely. Flush the routing cache only of entries that match the network namespace in which the purge event occurred. Signed-off-by: David S. Miller Acked-by: Eric Dumazet --- include/net/route.h | 2 +- net/ipv4/fib_frontend.c | 6 ++++- net/ipv4/route.c | 64 +++++++++++++++++++------------------------------ 3 files changed, 30 insertions(+), 42 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 27002362944a..93e10c453f6b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -114,7 +114,7 @@ extern int ip_rt_init(void); extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, __be32 src, struct net_device *dev); extern void rt_cache_flush(struct net *net, int how); -extern void rt_cache_flush_batch(void); +extern void rt_cache_flush_batch(struct net *net); extern int __ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d3a1112b9d9c..9f8bb68911e4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -987,7 +987,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(dev_net(dev), 0); break; case NETDEV_UNREGISTER_BATCH: - rt_cache_flush_batch(); + /* The batch unregister is only called on the first + * device in the list of devices being unregistered. + * Therefore we should not pass dev_net(dev) in here. + */ + rt_cache_flush_batch(NULL); break; } return NOTIFY_DONE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ae520963540f..d8b4f4d0d66e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -717,13 +717,15 @@ static inline int rt_is_expired(struct rtable *rth) * Can be called by a softirq or a process. * In the later case, we want to be reschedule if necessary */ -static void rt_do_flush(int process_context) +static void rt_do_flush(struct net *net, int process_context) { unsigned int i; struct rtable *rth, *next; - struct rtable * tail; for (i = 0; i <= rt_hash_mask; i++) { + struct rtable __rcu **pprev; + struct rtable *list; + if (process_context && need_resched()) cond_resched(); rth = rcu_dereference_raw(rt_hash_table[i].chain); @@ -731,50 +733,32 @@ static void rt_do_flush(int process_context) continue; spin_lock_bh(rt_hash_lock_addr(i)); -#ifdef CONFIG_NET_NS - { - struct rtable __rcu **prev; - struct rtable *p; - rth = rcu_dereference_protected(rt_hash_table[i].chain, + list = NULL; + pprev = &rt_hash_table[i].chain; + rth = rcu_dereference_protected(*pprev, lockdep_is_held(rt_hash_lock_addr(i))); - /* defer releasing the head of the list after spin_unlock */ - for (tail = rth; tail; - tail = rcu_dereference_protected(tail->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i)))) - if (!rt_is_expired(tail)) - break; - if (rth != tail) - rt_hash_table[i].chain = tail; - - /* call rt_free on entries after the tail requiring flush */ - prev = &rt_hash_table[i].chain; - for (p = rcu_dereference_protected(*prev, + while (rth) { + next = rcu_dereference_protected(rth->dst.rt_next, lockdep_is_held(rt_hash_lock_addr(i))); - p != NULL; - p = next) { - next = rcu_dereference_protected(p->dst.rt_next, - lockdep_is_held(rt_hash_lock_addr(i))); - if (!rt_is_expired(p)) { - prev = &p->dst.rt_next; + + if (!net || + net_eq(dev_net(rth->dst.dev), net)) { + rcu_assign_pointer(*pprev, next); + rcu_assign_pointer(rth->dst.rt_next, list); + list = rth; } else { - *prev = next; - rt_free(p); + pprev = &rth->dst.rt_next; } + rth = next; } - } -#else - rth = rcu_dereference_protected(rt_hash_table[i].chain, - lockdep_is_held(rt_hash_lock_addr(i))); - rcu_assign_pointer(rt_hash_table[i].chain, NULL); - tail = NULL; -#endif + spin_unlock_bh(rt_hash_lock_addr(i)); - for (; rth != tail; rth = next) { - next = rcu_dereference_protected(rth->dst.rt_next, 1); - rt_free(rth); + for (; list; list = next) { + next = rcu_dereference_protected(list->dst.rt_next, 1); + rt_free(list); } } } @@ -922,13 +906,13 @@ void rt_cache_flush(struct net *net, int delay) { rt_cache_invalidate(net); if (delay >= 0) - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } /* Flush previous cache invalidated entries from the cache */ -void rt_cache_flush_batch(void) +void rt_cache_flush_batch(struct net *net) { - rt_do_flush(!in_softirq()); + rt_do_flush(net, !in_softirq()); } static void rt_emergency_hash_rebuild(struct net *net) -- cgit v1.2.3-59-g8ed1b From 9f333281a7da4c3a59bccc0cb53f7590eb850d93 Mon Sep 17 00:00:00 2001 From: Johannes Stezenbach Date: Tue, 30 Nov 2010 16:49:23 +0100 Subject: mac80211/rt2x00: add ieee80211_tx_status_ni() All rt2x00 drivers except rt2800pci call ieee80211_tx_status() from a workqueue, which causes "NOHZ: local_softirq_pending 08" messages. To fix it, add ieee80211_tx_status_ni() similar to ieee80211_rx_ni() which can be called from process context, and call it from rt2x00lib_txdone(). For the rt2800pci special case a driver flag is introduced. https://bugzilla.kernel.org/show_bug.cgi?id=24892 Signed-off-by: Johannes Stezenbach Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800pci.c | 1 + drivers/net/wireless/rt2x00/rt2x00.h | 1 + drivers/net/wireless/rt2x00/rt2x00dev.c | 9 ++++++--- include/net/mac80211.h | 28 ++++++++++++++++++++++++---- 4 files changed, 32 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/rt2x00/rt2800pci.c b/drivers/net/wireless/rt2x00/rt2800pci.c index b26739535986..09a67905c230 100644 --- a/drivers/net/wireless/rt2x00/rt2800pci.c +++ b/drivers/net/wireless/rt2x00/rt2800pci.c @@ -912,6 +912,7 @@ static int rt2800pci_probe_hw(struct rt2x00_dev *rt2x00dev) __set_bit(DRIVER_REQUIRE_DMA, &rt2x00dev->flags); __set_bit(DRIVER_REQUIRE_L2PAD, &rt2x00dev->flags); __set_bit(DRIVER_REQUIRE_TXSTATUS_FIFO, &rt2x00dev->flags); + __set_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags); if (!modparam_nohwcrypt) __set_bit(CONFIG_SUPPORT_HW_CRYPTO, &rt2x00dev->flags); __set_bit(DRIVER_SUPPORT_LINK_TUNING, &rt2x00dev->flags); diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h index 94fe589acfaa..ab43e7ca2a23 100644 --- a/drivers/net/wireless/rt2x00/rt2x00.h +++ b/drivers/net/wireless/rt2x00/rt2x00.h @@ -664,6 +664,7 @@ enum rt2x00_flags { DRIVER_REQUIRE_COPY_IV, DRIVER_REQUIRE_L2PAD, DRIVER_REQUIRE_TXSTATUS_FIFO, + DRIVER_REQUIRE_TASKLET_CONTEXT, /* * Driver features diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 5ba79b935f09..d019830ca840 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -390,9 +390,12 @@ void rt2x00lib_txdone(struct queue_entry *entry, * through a mac80211 library call (RTS/CTS) then we should not * send the status report back. */ - if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) - ieee80211_tx_status(rt2x00dev->hw, entry->skb); - else + if (!(skbdesc_flags & SKBDESC_NOT_MAC80211)) { + if (test_bit(DRIVER_REQUIRE_TASKLET_CONTEXT, &rt2x00dev->flags)) + ieee80211_tx_status(rt2x00dev->hw, entry->skb); + else + ieee80211_tx_status_ni(rt2x00dev->hw, entry->skb); + } else dev_kfree_skb_any(entry->skb); /* diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 9fdf982d1286..365359b24177 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2024,8 +2024,8 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, * * This function may not be called in IRQ context. Calls to this function * for a single hardware must be synchronized against each other. Calls - * to this function and ieee80211_tx_status_irqsafe() may not be mixed - * for a single hardware. + * to this function, ieee80211_tx_status_ni() and ieee80211_tx_status_irqsafe() + * may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call @@ -2033,14 +2033,34 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb); +/** + * ieee80211_tx_status_ni - transmit status callback (in process context) + * + * Like ieee80211_tx_status() but can be called in process context. + * + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_irqsafe() may not be mixed + * for a single hardware. + * + * @hw: the hardware the frame was transmitted by + * @skb: the frame that was transmitted, owned by mac80211 after this call + */ +static inline void ieee80211_tx_status_ni(struct ieee80211_hw *hw, + struct sk_buff *skb) +{ + local_bh_disable(); + ieee80211_tx_status(hw, skb); + local_bh_enable(); +} + /** * ieee80211_tx_status_irqsafe - IRQ-safe transmit status callback * * Like ieee80211_tx_status() but can be called in IRQ context * (internally defers to a tasklet.) * - * Calls to this function and ieee80211_tx_status() may not be mixed for a - * single hardware. + * Calls to this function, ieee80211_tx_status() and + * ieee80211_tx_status_ni() may not be mixed for a single hardware. * * @hw: the hardware the frame was transmitted by * @skb: the frame that was transmitted, owned by mac80211 after this call -- cgit v1.2.3-59-g8ed1b From 24bdd9f4c9af75b33b438d60381a67626de0128d Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Thu, 16 Dec 2010 17:37:48 -0800 Subject: mac80211: Rename mesh_params to mesh_config to prepare for mesh_setup Mesh parameters can be to setup a mesh or to configure it. This patch renames the ambiguous name mesh_params to mesh_config in preparation for mesh_setup. Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- include/linux/nl80211.h | 15 ++++++++++----- include/net/cfg80211.h | 8 ++++---- net/mac80211/cfg.c | 8 ++++---- net/wireless/nl80211.c | 40 ++++++++++++++++++++-------------------- 4 files changed, 38 insertions(+), 33 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 7483a89cee8f..11a1de67b618 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -172,10 +172,10 @@ * to the specified ISO/IEC 3166-1 alpha2 country code. The core will * store this as a valid request and then query userspace for it. * - * @NL80211_CMD_GET_MESH_PARAMS: Get mesh networking properties for the + * @NL80211_CMD_GET_MESH_CONFIG: Get mesh networking properties for the * interface identified by %NL80211_ATTR_IFINDEX * - * @NL80211_CMD_SET_MESH_PARAMS: Set mesh networking properties for the + * @NL80211_CMD_SET_MESH_CONFIG: Set mesh networking properties for the * interface identified by %NL80211_ATTR_IFINDEX * * @NL80211_CMD_SET_MGMT_EXTRA_IE: Set extra IEs for management frames. The @@ -448,8 +448,8 @@ enum nl80211_commands { NL80211_CMD_SET_REG, NL80211_CMD_REQ_SET_REG, - NL80211_CMD_GET_MESH_PARAMS, - NL80211_CMD_SET_MESH_PARAMS, + NL80211_CMD_GET_MESH_CONFIG, + NL80211_CMD_SET_MESH_CONFIG, NL80211_CMD_SET_MGMT_EXTRA_IE /* reserved; not used */, @@ -538,6 +538,10 @@ enum nl80211_commands { #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT +/* source-level API compatibility */ +#define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG +#define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG + /** * enum nl80211_attrs - nl80211 netlink attributes * @@ -922,7 +926,7 @@ enum nl80211_attrs { NL80211_ATTR_REG_ALPHA2, NL80211_ATTR_REG_RULES, - NL80211_ATTR_MESH_PARAMS, + NL80211_ATTR_MESH_CONFIG, NL80211_ATTR_BSS_BASIC_RATES, @@ -1058,6 +1062,7 @@ enum nl80211_attrs { /* source-level API compatibility */ #define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION +#define NL80211_ATTR_MESH_PARAMS NL80211_ATTR_MESH_CONFIG /* * Allow user space programs to use #ifdef on new attributes by defining them diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6dc665a727c2..7283496c2d05 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1096,9 +1096,9 @@ struct cfg80211_pmksa { * @get_mpath: get a mesh path for the given parameters * @dump_mpath: dump mesh path callback -- resume dump at index @idx * - * @get_mesh_params: Put the current mesh parameters into *params + * @get_mesh_config: Get the current mesh configuration * - * @update_mesh_params: Update mesh parameters on a running mesh. + * @update_mesh_config: Update mesh parameters on a running mesh. * The mask is a bitfield which tells us which parameters to * set, and which to leave alone. * @@ -1246,10 +1246,10 @@ struct cfg80211_ops { int (*dump_mpath)(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo); - int (*get_mesh_params)(struct wiphy *wiphy, + int (*get_mesh_config)(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf); - int (*update_mesh_params)(struct wiphy *wiphy, + int (*update_mesh_config)(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf); int (*join_mesh)(struct wiphy *wiphy, struct net_device *dev, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ea06f92801e9..1c94a2ae22ee 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -984,7 +984,7 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_get_mesh_params(struct wiphy *wiphy, +static int ieee80211_get_mesh_config(struct wiphy *wiphy, struct net_device *dev, struct mesh_config *conf) { @@ -1000,7 +1000,7 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) return (mask >> (parm-1)) & 0x1; } -static int ieee80211_update_mesh_params(struct wiphy *wiphy, +static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) { @@ -1787,8 +1787,8 @@ struct cfg80211_ops mac80211_config_ops = { .change_mpath = ieee80211_change_mpath, .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, - .update_mesh_params = ieee80211_update_mesh_params, - .get_mesh_params = ieee80211_get_mesh_params, + .update_mesh_config = ieee80211_update_mesh_config, + .get_mesh_config = ieee80211_get_mesh_config, .join_mesh = ieee80211_join_mesh, .leave_mesh = ieee80211_leave_mesh, #endif diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aefce54d47e2..10be9350752e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -123,7 +123,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, - [NL80211_ATTR_MESH_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HT_CAPABILITY_LEN }, @@ -719,7 +719,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(add_beacon, NEW_BEACON); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); - CMD(update_mesh_params, SET_MESH_PARAMS); + CMD(update_mesh_config, SET_MESH_CONFIG); CMD(change_bss, SET_BSS); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); @@ -2673,7 +2673,7 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) return r; } -static int nl80211_get_mesh_params(struct sk_buff *skb, +static int nl80211_get_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -2688,7 +2688,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; - if (!rdev->ops->get_mesh_params) + if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; wdev_lock(wdev); @@ -2696,7 +2696,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, if (!wdev->mesh_id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else - err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, + err = rdev->ops->get_mesh_config(&rdev->wiphy, dev, &cur_params); wdev_unlock(wdev); @@ -2708,10 +2708,10 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_GET_MESH_PARAMS); + NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto nla_put_failure; - pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_PARAMS); + pinfoattr = nla_nest_start(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); @@ -2773,7 +2773,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, }; -static int nl80211_parse_mesh_params(struct genl_info *info, +static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) { @@ -2789,10 +2789,10 @@ do {\ } while (0);\ - if (!info->attrs[NL80211_ATTR_MESH_PARAMS]) + if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; if (nla_parse_nested(tb, NL80211_MESHCONF_ATTR_MAX, - info->attrs[NL80211_ATTR_MESH_PARAMS], + info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy)) return -EINVAL; @@ -2847,7 +2847,7 @@ do {\ #undef FILL_IN_MESH_PARAM_IF_SET } -static int nl80211_update_mesh_params(struct sk_buff *skb, +static int nl80211_update_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -2860,10 +2860,10 @@ static int nl80211_update_mesh_params(struct sk_buff *skb, if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; - if (!rdev->ops->update_mesh_params) + if (!rdev->ops->update_mesh_config) return -EOPNOTSUPP; - err = nl80211_parse_mesh_params(info, &cfg, &mask); + err = nl80211_parse_mesh_config(info, &cfg, &mask); if (err) return err; @@ -2872,7 +2872,7 @@ static int nl80211_update_mesh_params(struct sk_buff *skb, err = -ENOLINK; if (!err) - err = rdev->ops->update_mesh_params(&rdev->wiphy, dev, + err = rdev->ops->update_mesh_config(&rdev->wiphy, dev, mask, &cfg); wdev_unlock(wdev); @@ -4672,9 +4672,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) /* start with default */ memcpy(&cfg, &default_mesh_config, sizeof(cfg)); - if (info->attrs[NL80211_ATTR_MESH_PARAMS]) { + if (info->attrs[NL80211_ATTR_MESH_CONFIG]) { /* and parse parameters if given */ - err = nl80211_parse_mesh_params(info, &cfg, NULL); + err = nl80211_parse_mesh_config(info, &cfg, NULL); if (err) return err; } @@ -4952,16 +4952,16 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_GET_MESH_PARAMS, - .doit = nl80211_get_mesh_params, + .cmd = NL80211_CMD_GET_MESH_CONFIG, + .doit = nl80211_get_mesh_config, .policy = nl80211_policy, /* can be retrieved by unprivileged users */ .internal_flags = NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL, }, { - .cmd = NL80211_CMD_SET_MESH_PARAMS, - .doit = nl80211_update_mesh_params, + .cmd = NL80211_CMD_SET_MESH_CONFIG, + .doit = nl80211_update_mesh_config, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | -- cgit v1.2.3-59-g8ed1b From c80d545da3f7c0e534ccd4a780f322f80a92cff1 Mon Sep 17 00:00:00 2001 From: Javier Cardona Date: Thu, 16 Dec 2010 17:37:49 -0800 Subject: mac80211: Let userspace enable and configure vendor specific path selection. Userspace will now be allowed to toggle between the default path selection algorithm (HWMP, implemented in the kernel), and a vendor specific alternative. Also in the same patch, allow userspace to add information elements to mesh beacons. This is accordance with the Extensible Path Selection Framework specified in version 7.0 of the 802.11s draft. Signed-off-by: Javier Cardona Signed-off-by: John W. Linville --- include/linux/ieee80211.h | 25 ++++++++++++++++++ include/linux/nl80211.h | 47 +++++++++++++++++++++++++++++++--- include/net/cfg80211.h | 8 ++++++ net/mac80211/cfg.c | 39 +++++++++++++++++++++++++--- net/mac80211/ieee80211_i.h | 4 +-- net/mac80211/mesh.c | 7 ++++++ net/mac80211/mesh_plink.c | 3 ++- net/mac80211/tx.c | 3 ++- net/wireless/core.c | 22 +++++++++++----- net/wireless/core.h | 5 ++-- net/wireless/mesh.c | 24 ++++++++++-------- net/wireless/nl80211.c | 63 ++++++++++++++++++++++++++++++++++++++++++---- 12 files changed, 214 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 7f2354534242..cd681681d211 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1291,6 +1291,31 @@ enum ieee80211_key_len { WLAN_KEY_LEN_AES_CMAC = 16, }; +/** + * enum - mesh path selection protocol identifier + * + * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol + * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will + * be specified in a vendor specific information element + */ +enum { + IEEE80211_PATH_PROTOCOL_HWMP = 0, + IEEE80211_PATH_PROTOCOL_VENDOR = 255, +}; + +/** + * enum - mesh path selection metric identifier + * + * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric + * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be + * specified in a vendor specific information element + */ +enum { + IEEE80211_PATH_METRIC_AIRTIME = 0, + IEEE80211_PATH_METRIC_VENDOR = 255, +}; + + /* * IEEE 802.11-2007 7.3.2.9 Country information element * diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 11a1de67b618..69eaccac78c4 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -872,6 +872,9 @@ enum nl80211_commands { * attributes, specifying what a key should be set as default as. * See &enum nl80211_key_default_types. * + * @NL80211_ATTR_MESH_SETUP: Optional mesh setup parameters. These cannot be + * changed once the mesh is active. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1054,6 +1057,8 @@ enum nl80211_attrs { NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, + NL80211_ATTR_MESH_SETUP, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1564,7 +1569,8 @@ enum nl80211_mntr_flags { /** * enum nl80211_meshconf_params - mesh configuration parameters * - * Mesh configuration parameters + * Mesh configuration parameters. These can be changed while the mesh is + * active. * * @__NL80211_MESHCONF_INVALID: internal use * @@ -1587,9 +1593,6 @@ enum nl80211_mntr_flags { * @NL80211_MESHCONF_TTL: specifies the value of TTL field set at a source mesh * point. * - * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a - * source mesh point for path selection elements. - * * @NL80211_MESHCONF_AUTO_OPEN_PLINKS: whether we should automatically * open peer links when we detect compatible mesh peers. * @@ -1616,6 +1619,9 @@ enum nl80211_mntr_flags { * * @NL80211_MESHCONF_ROOTMODE: whether root mode is enabled or not * + * @NL80211_MESHCONF_ELEMENT_TTL: specifies the value of TTL field set at a + * source mesh point for path selection elements. + * * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute * * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use @@ -1643,6 +1649,39 @@ enum nl80211_meshconf_params { NL80211_MESHCONF_ATTR_MAX = __NL80211_MESHCONF_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_mesh_setup_params - mesh setup parameters + * + * Mesh setup parameters. These are used to start/join a mesh and cannot be + * changed while the mesh is active. + * + * @__NL80211_MESH_SETUP_INVALID: Internal use + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL: Enable this option to use a + * vendor specific path selection algorithm or disable it to use the default + * HWMP. + * + * @NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC: Enable this option to use a + * vendor specific path metric or disable it to use the default Airtime + * metric. + * + * @NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE: A vendor specific information + * element that vendors will use to identify the path selection methods and + * metrics in use. + * + * @__NL80211_MESH_SETUP_ATTR_AFTER_LAST: Internal use + */ +enum nl80211_mesh_setup_params { + __NL80211_MESH_SETUP_INVALID, + NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL, + NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC, + NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE, + + /* keep last */ + __NL80211_MESH_SETUP_ATTR_AFTER_LAST, + NL80211_MESH_SETUP_ATTR_MAX = __NL80211_MESH_SETUP_ATTR_AFTER_LAST - 1 +}; + /** * enum nl80211_txq_attr - TX queue parameter attributes * @__NL80211_TXQ_ATTR_INVALID: Attribute number 0 is reserved diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7283496c2d05..924d60366233 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -649,12 +649,20 @@ struct mesh_config { * struct mesh_setup - 802.11s mesh setup configuration * @mesh_id: the mesh ID * @mesh_id_len: length of the mesh ID, at least 1 and at most 32 bytes + * @path_sel_proto: which path selection protocol to use + * @path_metric: which metric to use + * @vendor_ie: vendor information elements (optional) + * @vendor_ie_len: length of vendor information elements * * These parameters are fixed when the mesh is created. */ struct mesh_setup { const u8 *mesh_id; u8 mesh_id_len; + u8 path_sel_proto; + u8 path_metric; + const u8 *vendor_ie; + u8 vendor_ie_len; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 1c94a2ae22ee..ae2c7127a8aa 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1000,6 +1000,36 @@ static inline bool _chg_mesh_attr(enum nl80211_meshconf_params parm, u32 mask) return (mask >> (parm-1)) & 0x1; } +static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh, + const struct mesh_setup *setup) +{ + u8 *new_ie; + const u8 *old_ie; + + /* first allocate the new vendor information element */ + new_ie = NULL; + old_ie = ifmsh->vendor_ie; + + ifmsh->vendor_ie_len = setup->vendor_ie_len; + if (setup->vendor_ie_len) { + new_ie = kmemdup(setup->vendor_ie, setup->vendor_ie_len, + GFP_KERNEL); + if (!new_ie) + return -ENOMEM; + } + + /* now copy the rest of the setup parameters */ + ifmsh->mesh_id_len = setup->mesh_id_len; + memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); + ifmsh->mesh_pp_id = setup->path_sel_proto; + ifmsh->mesh_pm_id = setup->path_metric; + ifmsh->vendor_ie = new_ie; + + kfree(old_ie); + + return 0; +} + static int ieee80211_update_mesh_config(struct wiphy *wiphy, struct net_device *dev, u32 mask, const struct mesh_config *nconf) @@ -1059,11 +1089,12 @@ static int ieee80211_join_mesh(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + int err; - memcpy(&sdata->u.mesh.mshcfg, conf, sizeof(struct mesh_config)); - ifmsh->mesh_id_len = setup->mesh_id_len; - memcpy(ifmsh->mesh_id, setup->mesh_id, ifmsh->mesh_id_len); - + memcpy(&ifmsh->mshcfg, conf, sizeof(struct mesh_config)); + err = copy_mesh_setup(ifmsh, setup); + if (err) + return err; ieee80211_start_mesh(sdata); return 0; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ce58b2a676e2..eadaa243a3da 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -484,6 +484,8 @@ struct ieee80211_if_mesh { struct mesh_config mshcfg; u32 mesh_seqnum; bool accepting_plinks; + const u8 *vendor_ie; + u8 vendor_ie_len; }; #ifdef CONFIG_MAC80211_MESH @@ -585,9 +587,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_vlan vlan; struct ieee80211_if_managed mgd; struct ieee80211_if_ibss ibss; -#ifdef CONFIG_MAC80211_MESH struct ieee80211_if_mesh mesh; -#endif u32 mntr_flags; } u; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 63e1188d5062..c326e009389d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -287,6 +287,13 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) *pos++ |= sdata->u.mesh.accepting_plinks ? MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00; *pos++ = 0x00; + + if (sdata->u.mesh.vendor_ie) { + int len = sdata->u.mesh.vendor_ie_len; + const u8 *data = sdata->u.mesh.vendor_ie; + if (skb_tailroom(skb) > len) + memcpy(skb_put(skb, len), data, len); + } } u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 1c91f0f3c307..44b53931ba5e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -160,7 +160,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, __le16 reason) { struct ieee80211_local *local = sdata->local; - struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + + sdata->u.mesh.vendor_ie_len); struct ieee80211_mgmt *mgmt; bool include_plid = false; static const u8 meshpeeringproto[] = { 0x00, 0x0F, 0xAC, 0x2A }; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 157bde993ef5..f4b1b624ea9f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2290,7 +2290,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, u8 *pos; /* headroom, head length, tail length and maximum TIM length */ - skb = dev_alloc_skb(local->tx_headroom + 400); + skb = dev_alloc_skb(local->tx_headroom + 400 + + sdata->u.mesh.vendor_ie_len); if (!skb) goto out; diff --git a/net/wireless/core.c b/net/wireless/core.c index 79772fcc37bc..e9a5f8ca4c27 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -789,13 +789,23 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, cfg80211_mgd_wext_connect(rdev, wdev); break; #endif +#ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - /* backward compat code ... */ - if (wdev->mesh_id_up_len) - __cfg80211_join_mesh(rdev, dev, wdev->ssid, - wdev->mesh_id_up_len, - &default_mesh_config); - break; + { + /* backward compat code... */ + struct mesh_setup setup; + memcpy(&setup, &default_mesh_setup, + sizeof(setup)); + /* back compat only needed for mesh_id */ + setup.mesh_id = wdev->ssid; + setup.mesh_id_len = wdev->mesh_id_up_len; + if (wdev->mesh_id_up_len) + __cfg80211_join_mesh(rdev, dev, + &setup, + &default_mesh_config); + break; + } +#endif default: break; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 743203bb61ac..26a0a084e16b 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -287,13 +287,14 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* mesh */ extern const struct mesh_config default_mesh_config; +extern const struct mesh_setup default_mesh_setup; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_setup *setup, const struct mesh_config *conf); int cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev); diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index e0b9747fe50a..73e39c171ffb 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -50,17 +50,19 @@ const struct mesh_config default_mesh_config = { .min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT, }; +const struct mesh_setup default_mesh_setup = { + .path_sel_proto = IEEE80211_PATH_PROTOCOL_HWMP, + .path_metric = IEEE80211_PATH_METRIC_AIRTIME, + .vendor_ie = NULL, + .vendor_ie_len = 0, +}; int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; - struct mesh_setup setup = { - .mesh_id = mesh_id, - .mesh_id_len = mesh_id_len, - }; int err; BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); @@ -73,16 +75,16 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (wdev->mesh_id_len) return -EALREADY; - if (!mesh_id_len) + if (!setup->mesh_id_len) return -EINVAL; if (!rdev->ops->join_mesh) return -EOPNOTSUPP; - err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, &setup); + err = rdev->ops->join_mesh(&rdev->wiphy, dev, conf, setup); if (!err) { - memcpy(wdev->ssid, mesh_id, mesh_id_len); - wdev->mesh_id_len = mesh_id_len; + memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); + wdev->mesh_id_len = setup->mesh_id_len; } return err; @@ -90,14 +92,14 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, int cfg80211_join_mesh(struct cfg80211_registered_device *rdev, struct net_device *dev, - const u8 *mesh_id, u8 mesh_id_len, + const struct mesh_setup *setup, const struct mesh_config *conf) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); - err = __cfg80211_join_mesh(rdev, dev, mesh_id, mesh_id_len, conf); + err = __cfg80211_join_mesh(rdev, dev, setup, conf); wdev_unlock(wdev); return err; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 10be9350752e..eef89d0b558b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2773,6 +2773,14 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 }, }; +static const struct nla_policy + nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = { + [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, + [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, + [NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, +}; + static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) @@ -2839,14 +2847,50 @@ do {\ dot11MeshHWMPRootMode, mask, NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); - if (mask_out) *mask_out = mask; + return 0; #undef FILL_IN_MESH_PARAM_IF_SET } +static int nl80211_parse_mesh_setup(struct genl_info *info, + struct mesh_setup *setup) +{ + struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; + + if (!info->attrs[NL80211_ATTR_MESH_SETUP]) + return -EINVAL; + if (nla_parse_nested(tb, NL80211_MESH_SETUP_ATTR_MAX, + info->attrs[NL80211_ATTR_MESH_SETUP], + nl80211_mesh_setup_params_policy)) + return -EINVAL; + + if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL]) + setup->path_sel_proto = + (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ? + IEEE80211_PATH_PROTOCOL_VENDOR : + IEEE80211_PATH_PROTOCOL_HWMP; + + if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC]) + setup->path_metric = + (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ? + IEEE80211_PATH_METRIC_VENDOR : + IEEE80211_PATH_METRIC_AIRTIME; + + if (tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]) { + struct nlattr *ieattr = + tb[NL80211_MESH_SETUP_VENDOR_PATH_SEL_IE]; + if (!is_valid_ie_attr(ieattr)) + return -EINVAL; + setup->vendor_ie = nla_data(ieattr); + setup->vendor_ie_len = nla_len(ieattr); + } + + return 0; +} + static int nl80211_update_mesh_config(struct sk_buff *skb, struct genl_info *info) { @@ -4667,10 +4711,12 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct mesh_config cfg; + struct mesh_setup setup; int err; /* start with default */ memcpy(&cfg, &default_mesh_config, sizeof(cfg)); + memcpy(&setup, &default_mesh_setup, sizeof(setup)); if (info->attrs[NL80211_ATTR_MESH_CONFIG]) { /* and parse parameters if given */ @@ -4683,10 +4729,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) !nla_len(info->attrs[NL80211_ATTR_MESH_ID])) return -EINVAL; - return cfg80211_join_mesh(rdev, dev, - nla_data(info->attrs[NL80211_ATTR_MESH_ID]), - nla_len(info->attrs[NL80211_ATTR_MESH_ID]), - &cfg); + setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); + setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + + if (info->attrs[NL80211_ATTR_MESH_SETUP]) { + /* parse additional setup parameters if given */ + err = nl80211_parse_mesh_setup(info, &setup); + if (err) + return err; + } + + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) -- cgit v1.2.3-59-g8ed1b From 7f531e03abf0162df3966c4fa5fa6fdd9302cb6b Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Thu, 16 Dec 2010 11:30:22 +0900 Subject: cfg80211: Separate available antennas for RX and TX As has been pointed out by Daniel Halperin some devices (e.g. Intel IWL5100) can only TX from a subset of RX antennas, so use separate availability masks for RX and TX. Signed-off-by: Bruno Randolf Signed-off-by: John W. Linville --- include/net/cfg80211.h | 12 +++++++++--- net/wireless/nl80211.c | 17 ++++++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 924d60366233..bcc9f448ec4e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1482,8 +1482,13 @@ struct ieee80211_txrx_stypes { * transmitted through nl80211, points to an array indexed by interface * type * - * @available_antennas: bitmap of antennas which are available to configure. - * antenna configuration commands will be rejected unless this is set. + * @available_antennas_tx: bitmap of antennas which are available to be + * configured as TX antennas. Antenna configuration commands will be + * rejected unless this or @available_antennas_rx is set. + * + * @available_antennas_rx: bitmap of antennas which are available to be + * configured as RX antennas. Antenna configuration commands will be + * rejected unless this or @available_antennas_tx is set. * * @max_remain_on_channel_duration: Maximum time a remain-on-channel operation * may request, if implemented. @@ -1528,7 +1533,8 @@ struct wiphy { u8 max_num_pmkids; - u32 available_antennas; + u32 available_antennas_tx; + u32 available_antennas_rx; /* If multiple wiphys are registered and you're handed e.g. * a regular netdev with assigned ieee80211_ptr, you won't diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6a5d6fa11e46..8d2f5f8d8080 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -605,7 +605,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); - if (dev->wiphy.available_antennas && dev->ops->get_antenna) { + if ((dev->wiphy.available_antennas_tx || + dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; res = dev->ops->get_antenna(&dev->wiphy, &tx_ant, &rx_ant); @@ -1107,7 +1108,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { u32 tx_ant, rx_ant; - if (!rdev->wiphy.available_antennas || !rdev->ops->set_antenna) { + if ((!rdev->wiphy.available_antennas_tx && + !rdev->wiphy.available_antennas_rx) || + !rdev->ops->set_antenna) { result = -EOPNOTSUPP; goto bad_res; } @@ -1116,15 +1119,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); /* reject antenna configurations which don't match the - * available antenna mask, except for the "all" mask */ - if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas)) || - (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas))) { + * available antenna masks, except for the "all" mask */ + if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { result = -EINVAL; goto bad_res; } - tx_ant = tx_ant & rdev->wiphy.available_antennas; - rx_ant = rx_ant & rdev->wiphy.available_antennas; + tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; + rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev->ops->set_antenna(&rdev->wiphy, tx_ant, rx_ant); if (result) -- cgit v1.2.3-59-g8ed1b From 356f039822b8d802138f7121c80d2a9286976dbd Mon Sep 17 00:00:00 2001 From: Nandita Dukkipati Date: Mon, 20 Dec 2010 14:15:56 +0000 Subject: TCP: increase default initial receive window. This patch changes the default initial receive window to 10 mss (defined constant). The default window is limited to the maximum of 10*1460 and 2*mss (when mss > 1460). draft-ietf-tcpm-initcwnd-00 is a proposal to the IETF that recommends increasing TCP's initial congestion window to 10 mss or about 15KB. Leading up to this proposal were several large-scale live Internet experiments with an initial congestion window of 10 mss (IW10), where we showed that the average latency of HTTP responses improved by approximately 10%. This was accompanied by a slight increase in retransmission rate (0.5%), most of which is coming from applications opening multiple simultaneous connections. To understand the extreme worst case scenarios, and fairness issues (IW10 versus IW3), we further conducted controlled testbed experiments. We came away finding minimal negative impact even under low link bandwidths (dial-ups) and small buffers. These results are extremely encouraging to adopting IW10. However, an initial congestion window of 10 mss is useless unless a TCP receiver advertises an initial receive window of at least 10 mss. Fortunately, in the large-scale Internet experiments we found that most widely used operating systems advertised large initial receive windows of 64KB, allowing us to experiment with a wide range of initial congestion windows. Linux systems were among the few exceptions that advertised a small receive window of 6KB. The purpose of this patch is to fix this shortcoming. References: 1. A comprehensive list of all IW10 references to date. http://code.google.com/speed/protocols/tcpm-IW10.html 2. Paper describing results from large-scale Internet experiments with IW10. http://ccr.sigcomm.org/drupal/?q=node/621 3. Controlled testbed experiments under worst case scenarios and a fairness study. http://www.ietf.org/proceedings/79/slides/tcpm-0.pdf 4. Raw test data from testbed experiments (Linux senders/receivers) with initial congestion and receive windows of both 10 mss. http://research.csc.ncsu.edu/netsrv/?q=content/iw10 5. Internet-Draft. Increasing TCP's Initial Window. https://datatracker.ietf.org/doc/draft-ietf-tcpm-initcwnd/ Signed-off-by: Nandita Dukkipati Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ net/ipv4/tcp_output.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index b4480300cadf..38509f047382 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -60,6 +60,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define MAX_TCP_WINDOW 32767U +/* Offer an initial receive window of 10 mss. */ +#define TCP_DEFAULT_INIT_RCVWND 10 + /* Minimal accepted MSS. It is (60+60+8) - (20+20). */ #define TCP_MIN_MSS 88U diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2d390669d406..dc7c096ddfef 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -228,10 +228,15 @@ void tcp_select_initial_window(int __space, __u32 mss, } } - /* Set initial window to value enough for senders, following RFC5681. */ + /* Set initial window to a value enough for senders starting with + * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place + * a limit on the initial window when mss is larger than 1460. + */ if (mss > (1 << *rcv_wscale)) { - int init_cwnd = rfc3390_bytes_to_packets(mss); - + int init_cwnd = TCP_DEFAULT_INIT_RCVWND; + if (mss > 1460) + init_cwnd = + max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); /* when initializing use the value from init_rcv_wnd * rather than the default from above */ -- cgit v1.2.3-59-g8ed1b From da521b2c4f046383bc8941604174bc0e8bffb430 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 21 Dec 2010 12:43:16 -0800 Subject: net: Fix range checks in tcf_valid_offset(). This function has three bugs: 1) The offset should be valid most of the time, this is just a sanity check, therefore we should use "likely" not "unlikely" 2) This is the only place where we can check for arithmetic overflow of the pointer plus the length. 3) The existing range checks are off by one, the valid range is skb->head to skb_tail_pointer(), inclusive. Based almost entirely upon a patch by Ralph Loader. Reported-by: Ralph Loader Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dd3031aed9d5..9fcc680ab6b9 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -323,7 +323,9 @@ static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer) static inline int tcf_valid_offset(const struct sk_buff *skb, const unsigned char *ptr, const int len) { - return unlikely((ptr + len) < skb_tail_pointer(skb) && ptr > skb->head); + return likely((ptr + len) <= skb_tail_pointer(skb) && + ptr >= skb->head && + (ptr <= (ptr + len))); } #ifdef CONFIG_NET_CLS_IND -- cgit v1.2.3-59-g8ed1b From e1e5406854378dfada3f33c7192b012083a5b8e0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Nov 2010 08:58:45 +0100 Subject: mac80211: add throughput based LED blink trigger iwlwifi and other drivers like to blink their LED based on throughput. Implement this generically in mac80211, based on a throughput table the driver specifies. That way, drivers can set the blink frequencies depending on their desired behaviour and max throughput. All the drivers need to do is provide an LED class device, best with blink hardware offload. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 38 ++++++++++++++ net/mac80211/ieee80211_i.h | 13 +++++ net/mac80211/iface.c | 1 + net/mac80211/led.c | 121 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/led.h | 44 +++++++++++++---- net/mac80211/rx.c | 3 ++ net/mac80211/tx.c | 3 ++ net/mac80211/util.c | 2 + 8 files changed, 216 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 69ded1ee49ce..40a93d582c79 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1852,11 +1852,26 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, */ int ieee80211_register_hw(struct ieee80211_hw *hw); +/** + * struct ieee80211_tpt_blink - throughput blink description + * @throughput: throughput in Kbit/sec + * @blink_time: blink time in milliseconds + * (full cycle, ie. one off + one on period) + */ +struct ieee80211_tpt_blink { + int throughput; + int blink_time; +}; + #ifdef CONFIG_MAC80211_LEDS extern char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); +extern char *__ieee80211_create_tpt_led_trigger( + struct ieee80211_hw *hw, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len); #endif /** * ieee80211_get_tx_led_name - get name of TX LED @@ -1934,6 +1949,29 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) #endif } +/** + * ieee80211_create_tpt_led_trigger - create throughput LED trigger + * @hw: the hardware to create the trigger for + * @blink_table: the blink table -- needs to be ordered by throughput + * @blink_table_len: size of the blink table + * + * This function returns %NULL (in case of error, or if no LED + * triggers are configured) or the name of the new trigger. + * This function must be called before ieee80211_register_hw(). + */ +static inline char * +ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) +{ +#ifdef CONFIG_MAC80211_LEDS + return __ieee80211_create_tpt_led_trigger(hw, blink_table, + blink_table_len); +#else + return NULL; +#endif +} + /** * ieee80211_unregister_hw - Unregister a hardware device * diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eadaa243a3da..523b90be8dc5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -630,6 +631,17 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_SKB_ADD, }; +struct tpt_led_trigger { + struct led_trigger trig; + char name[32]; + const struct ieee80211_tpt_blink *blink_table; + unsigned int blink_table_len; + struct timer_list timer; + bool running; + unsigned long prev_traffic; + unsigned long tx_bytes, rx_bytes; +}; + /** * mac80211 scan flags - currently active scan mode * @@ -838,6 +850,7 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_LEDS int tx_led_counter, rx_led_counter; struct led_trigger *tx_led, *rx_led, *assoc_led, *radio_led; + struct tpt_led_trigger *tpt_led_trigger; char tx_led_name[32], rx_led_name[32], assoc_led_name[32], radio_led_name[32]; #endif diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f0f11bb794af..989df7065c21 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -220,6 +220,7 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); + ieee80211_start_tpt_led_trig(local); } /* diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 740a1d4e0a9c..79b13090aed7 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -103,6 +103,13 @@ void ieee80211_led_init(struct ieee80211_local *local) local->radio_led = NULL; } } + + if (local->tpt_led_trigger) { + if (led_trigger_register(&local->tpt_led_trigger->trig)) { + kfree(local->tpt_led_trigger); + local->tpt_led_trigger = NULL; + } + } } void ieee80211_led_exit(struct ieee80211_local *local) @@ -123,6 +130,11 @@ void ieee80211_led_exit(struct ieee80211_local *local) led_trigger_unregister(local->rx_led); kfree(local->rx_led); } + + if (local->tpt_led_trigger) { + led_trigger_unregister(&local->tpt_led_trigger->trig); + kfree(local->tpt_led_trigger); + } } char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw) @@ -156,3 +168,112 @@ char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw) return local->rx_led_name; } EXPORT_SYMBOL(__ieee80211_get_rx_led_name); + +static unsigned long tpt_trig_traffic(struct ieee80211_local *local, + struct tpt_led_trigger *tpt_trig) +{ + unsigned long traffic, delta; + + traffic = tpt_trig->tx_bytes + tpt_trig->rx_bytes; + + delta = traffic - tpt_trig->prev_traffic; + tpt_trig->prev_traffic = traffic; + return DIV_ROUND_UP(delta, 1024 / 8); +} + +static void tpt_trig_timer(unsigned long data) +{ + struct ieee80211_local *local = (void *)data; + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + struct led_classdev *led_cdev; + unsigned long on, off, tpt; + int i; + + if (!tpt_trig->running) + return; + + mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); + + tpt = tpt_trig_traffic(local, tpt_trig); + + /* default to just solid on */ + on = 1; + off = 0; + + for (i = tpt_trig->blink_table_len - 1; i >= 0; i--) { + if (tpt_trig->blink_table[i].throughput < 0 || + tpt > tpt_trig->blink_table[i].throughput) { + off = tpt_trig->blink_table[i].blink_time / 2; + on = tpt_trig->blink_table[i].blink_time - off; + break; + } + } + + read_lock(&tpt_trig->trig.leddev_list_lock); + list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + led_blink_set(led_cdev, &on, &off); + read_unlock(&tpt_trig->trig.leddev_list_lock); +} + +extern char *__ieee80211_create_tpt_led_trigger( + struct ieee80211_hw *hw, + const struct ieee80211_tpt_blink *blink_table, + unsigned int blink_table_len) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct tpt_led_trigger *tpt_trig; + + if (WARN_ON(local->tpt_led_trigger)) + return NULL; + + tpt_trig = kzalloc(sizeof(struct tpt_led_trigger), GFP_KERNEL); + if (!tpt_trig) + return NULL; + + snprintf(tpt_trig->name, sizeof(tpt_trig->name), + "%stpt", wiphy_name(local->hw.wiphy)); + + tpt_trig->trig.name = tpt_trig->name; + + tpt_trig->blink_table = blink_table; + tpt_trig->blink_table_len = blink_table_len; + + setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); + + local->tpt_led_trigger = tpt_trig; + + return tpt_trig->name; +} +EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger); + +void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + + if (!tpt_trig) + return; + + /* reset traffic */ + tpt_trig_traffic(local, tpt_trig); + tpt_trig->running = true; + + tpt_trig_timer((unsigned long)local); + mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); +} + +void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + struct led_classdev *led_cdev; + + if (!tpt_trig) + return; + + tpt_trig->running = false; + del_timer_sync(&tpt_trig->timer); + + read_lock(&tpt_trig->trig.leddev_list_lock); + list_for_each_entry(led_cdev, &tpt_trig->trig.led_cdevs, trig_list) + led_brightness_set(led_cdev, LED_OFF); + read_unlock(&tpt_trig->trig.leddev_list_lock); +} diff --git a/net/mac80211/led.h b/net/mac80211/led.h index 8320cbac61c6..6c215dc0fc96 100644 --- a/net/mac80211/led.h +++ b/net/mac80211/led.h @@ -12,15 +12,17 @@ #include "ieee80211_i.h" #ifdef CONFIG_MAC80211_LEDS -extern void ieee80211_led_rx(struct ieee80211_local *local); -extern void ieee80211_led_tx(struct ieee80211_local *local, int q); -extern void ieee80211_led_assoc(struct ieee80211_local *local, - bool associated); -extern void ieee80211_led_radio(struct ieee80211_local *local, - bool enabled); -extern void ieee80211_led_names(struct ieee80211_local *local); -extern void ieee80211_led_init(struct ieee80211_local *local); -extern void ieee80211_led_exit(struct ieee80211_local *local); +void ieee80211_led_rx(struct ieee80211_local *local); +void ieee80211_led_tx(struct ieee80211_local *local, int q); +void ieee80211_led_assoc(struct ieee80211_local *local, + bool associated); +void ieee80211_led_radio(struct ieee80211_local *local, + bool enabled); +void ieee80211_led_names(struct ieee80211_local *local); +void ieee80211_led_init(struct ieee80211_local *local); +void ieee80211_led_exit(struct ieee80211_local *local); +void ieee80211_start_tpt_led_trig(struct ieee80211_local *local); +void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local); #else static inline void ieee80211_led_rx(struct ieee80211_local *local) { @@ -45,4 +47,28 @@ static inline void ieee80211_led_init(struct ieee80211_local *local) static inline void ieee80211_led_exit(struct ieee80211_local *local) { } +static inline void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) +{ +} +static inline void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) +{ +} +#endif + +static inline void +ieee80211_tpt_led_trig_tx(struct ieee80211_local *local, __le16 fc, int bytes) +{ +#ifdef CONFIG_MAC80211_LEDS + if (local->tpt_led_trigger && ieee80211_is_data(fc)) + local->tpt_led_trigger->tx_bytes += bytes; #endif +} + +static inline void +ieee80211_tpt_led_trig_rx(struct ieee80211_local *local, __le16 fc, int bytes) +{ +#ifdef CONFIG_MAC80211_LEDS + if (local->tpt_led_trigger && ieee80211_is_data(fc)) + local->tpt_led_trigger->rx_bytes += bytes; +#endif +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7c5d1b2ec453..01a3f2630eaf 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2928,6 +2928,9 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) return; } + ieee80211_tpt_led_trig_rx(local, + ((struct ieee80211_hdr *)skb->data)->frame_control, + skb->len); __ieee80211_rx_handle_packet(hw, skb); rcu_read_unlock(); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index d2b4b67a7b53..68c2fbd16ebb 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1297,6 +1297,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, while (skb) { int q = skb_get_queue_mapping(skb); + __le16 fc; spin_lock_irqsave(&local->queue_stop_reason_lock, flags); ret = IEEE80211_TX_OK; @@ -1339,6 +1340,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, else info->control.sta = NULL; + fc = ((struct ieee80211_hdr *)skb->data)->frame_control; ret = drv_tx(local, skb); if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { dev_kfree_skb(skb); @@ -1349,6 +1351,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, return IEEE80211_TX_AGAIN; } + ieee80211_tpt_led_trig_tx(local, fc, len); *skbp = skb = next; ieee80211_led_tx(local, 1); fragm = true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e497476174ce..48306415a1cb 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1116,6 +1116,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local, void ieee80211_stop_device(struct ieee80211_local *local) { ieee80211_led_radio(local, false); + ieee80211_stop_tpt_led_trig(local); cancel_work_sync(&local->reconfig_filter); @@ -1150,6 +1151,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) } ieee80211_led_radio(local, true); + ieee80211_start_tpt_led_trig(local); } /* add interfaces */ -- cgit v1.2.3-59-g8ed1b From 67408c8c7b9daf28b50e33be3541334c07d15789 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Nov 2010 08:59:23 +0100 Subject: mac80211: selective throughput LED trigger active The throughput LED trigger was always active when the radio was enabled. In most cases that's likely the desired behaviour, but iwlwifi requires it to be only active when one of the virtual interfaces is actually "connected" in some way. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 20 +++++++++++++++++--- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/iface.c | 16 +++++++++++++++- net/mac80211/led.c | 39 ++++++++++++++++++++++++++++++++++----- net/mac80211/led.h | 11 +++++------ net/mac80211/util.c | 5 +++-- 6 files changed, 76 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 40a93d582c79..479c35e160e3 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1863,13 +1863,26 @@ struct ieee80211_tpt_blink { int blink_time; }; +/** + * enum ieee80211_tpt_led_trigger_flags - throughput trigger flags + * @IEEE80211_TPT_LEDTRIG_FL_RADIO: enable blinking with radio + * @IEEE80211_TPT_LEDTRIG_FL_WORK: enable blinking when working + * @IEEE80211_TPT_LEDTRIG_FL_CONNECTED: enable blinking when at least one + * interface is connected in some way, including being an AP + */ +enum ieee80211_tpt_led_trigger_flags { + IEEE80211_TPT_LEDTRIG_FL_RADIO = BIT(0), + IEEE80211_TPT_LEDTRIG_FL_WORK = BIT(1), + IEEE80211_TPT_LEDTRIG_FL_CONNECTED = BIT(2), +}; + #ifdef CONFIG_MAC80211_LEDS extern char *__ieee80211_get_tx_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_rx_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_assoc_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_get_radio_led_name(struct ieee80211_hw *hw); extern char *__ieee80211_create_tpt_led_trigger( - struct ieee80211_hw *hw, + struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len); #endif @@ -1952,6 +1965,7 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) /** * ieee80211_create_tpt_led_trigger - create throughput LED trigger * @hw: the hardware to create the trigger for + * @flags: trigger flags, see &enum ieee80211_tpt_led_trigger_flags * @blink_table: the blink table -- needs to be ordered by throughput * @blink_table_len: size of the blink table * @@ -1960,12 +1974,12 @@ static inline char *ieee80211_get_radio_led_name(struct ieee80211_hw *hw) * This function must be called before ieee80211_register_hw(). */ static inline char * -ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, +ieee80211_create_tpt_led_trigger(struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) { #ifdef CONFIG_MAC80211_LEDS - return __ieee80211_create_tpt_led_trigger(hw, blink_table, + return __ieee80211_create_tpt_led_trigger(hw, flags, blink_table, blink_table_len); #else return NULL; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 523b90be8dc5..3810c72ac062 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -637,9 +637,10 @@ struct tpt_led_trigger { const struct ieee80211_tpt_blink *blink_table; unsigned int blink_table_len; struct timer_list timer; - bool running; unsigned long prev_traffic; unsigned long tx_bytes, rx_bytes; + unsigned int active, want; + bool running; }; /** diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 989df7065c21..b6db237672ff 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -220,7 +220,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) /* we're brought up, everything changes */ hw_reconf_flags = ~0; ieee80211_led_radio(local, true); - ieee80211_start_tpt_led_trig(local); + ieee80211_mod_tpt_led_trig(local, + IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); } /* @@ -1265,6 +1266,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) int count = 0; bool working = false, scanning = false; struct ieee80211_work *wk; + unsigned int led_trig_start = 0, led_trig_stop = 0; #ifdef CONFIG_PROVE_LOCKING WARN_ON(debug_locks && !lockdep_rtnl_is_held() && @@ -1314,6 +1316,18 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); } + if (working || scanning) + led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; + else + led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; + + if (count) + led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; + else + led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_CONNECTED; + + ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); + if (working) return ieee80211_idle_off(local, "working"); if (scanning) diff --git a/net/mac80211/led.c b/net/mac80211/led.c index 79b13090aed7..4905eb8af572 100644 --- a/net/mac80211/led.c +++ b/net/mac80211/led.c @@ -216,7 +216,7 @@ static void tpt_trig_timer(unsigned long data) } extern char *__ieee80211_create_tpt_led_trigger( - struct ieee80211_hw *hw, + struct ieee80211_hw *hw, unsigned int flags, const struct ieee80211_tpt_blink *blink_table, unsigned int blink_table_len) { @@ -237,6 +237,7 @@ extern char *__ieee80211_create_tpt_led_trigger( tpt_trig->blink_table = blink_table; tpt_trig->blink_table_len = blink_table_len; + tpt_trig->want = flags; setup_timer(&tpt_trig->timer, tpt_trig_timer, (unsigned long)local); @@ -246,11 +247,11 @@ extern char *__ieee80211_create_tpt_led_trigger( } EXPORT_SYMBOL(__ieee80211_create_tpt_led_trigger); -void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) +static void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; - if (!tpt_trig) + if (tpt_trig->running) return; /* reset traffic */ @@ -261,12 +262,12 @@ void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) mod_timer(&tpt_trig->timer, round_jiffies(jiffies + HZ)); } -void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) +static void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) { struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; struct led_classdev *led_cdev; - if (!tpt_trig) + if (!tpt_trig->running) return; tpt_trig->running = false; @@ -277,3 +278,31 @@ void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) led_brightness_set(led_cdev, LED_OFF); read_unlock(&tpt_trig->trig.leddev_list_lock); } + +void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, unsigned int types_off) +{ + struct tpt_led_trigger *tpt_trig = local->tpt_led_trigger; + bool allowed; + + WARN_ON(types_on & types_off); + + if (!tpt_trig) + return; + + tpt_trig->active &= ~types_off; + tpt_trig->active |= types_on; + + /* + * Regardless of wanted state, we shouldn't blink when + * the radio is disabled -- this can happen due to some + * code ordering issues with __ieee80211_recalc_idle() + * being called before the radio is started. + */ + allowed = tpt_trig->active & IEEE80211_TPT_LEDTRIG_FL_RADIO; + + if (!allowed || !(tpt_trig->active & tpt_trig->want)) + ieee80211_stop_tpt_led_trig(local); + else + ieee80211_start_tpt_led_trig(local); +} diff --git a/net/mac80211/led.h b/net/mac80211/led.h index 6c215dc0fc96..e0275d9befa8 100644 --- a/net/mac80211/led.h +++ b/net/mac80211/led.h @@ -21,8 +21,8 @@ void ieee80211_led_radio(struct ieee80211_local *local, void ieee80211_led_names(struct ieee80211_local *local); void ieee80211_led_init(struct ieee80211_local *local); void ieee80211_led_exit(struct ieee80211_local *local); -void ieee80211_start_tpt_led_trig(struct ieee80211_local *local); -void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local); +void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, unsigned int types_off); #else static inline void ieee80211_led_rx(struct ieee80211_local *local) { @@ -47,10 +47,9 @@ static inline void ieee80211_led_init(struct ieee80211_local *local) static inline void ieee80211_led_exit(struct ieee80211_local *local) { } -static inline void ieee80211_start_tpt_led_trig(struct ieee80211_local *local) -{ -} -static inline void ieee80211_stop_tpt_led_trig(struct ieee80211_local *local) +static inline void ieee80211_mod_tpt_led_trig(struct ieee80211_local *local, + unsigned int types_on, + unsigned int types_off) { } #endif diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 48306415a1cb..cf68700abffa 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1116,7 +1116,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local, void ieee80211_stop_device(struct ieee80211_local *local) { ieee80211_led_radio(local, false); - ieee80211_stop_tpt_led_trig(local); + ieee80211_mod_tpt_led_trig(local, 0, IEEE80211_TPT_LEDTRIG_FL_RADIO); cancel_work_sync(&local->reconfig_filter); @@ -1151,7 +1151,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) } ieee80211_led_radio(local, true); - ieee80211_start_tpt_led_trig(local); + ieee80211_mod_tpt_led_trig(local, + IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); } /* add interfaces */ -- cgit v1.2.3-59-g8ed1b From 02d981292ad3149e8e5f37cffbccedab1a8576d8 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 13 Dec 2010 21:07:04 +0200 Subject: Bluetooth: Add read_version management command This patch implements the initial read_version command that userspace will use before any other management interface operations. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/mgmt.h | 6 ++++++ net/bluetooth/mgmt.c | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 95974daa725e..d353d64bfffb 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -27,6 +27,12 @@ struct mgmt_hdr { } __packed; #define MGMT_HDR_SIZE 4 +#define MGMT_OP_READ_VERSION 0x0001 +struct mgmt_rp_read_version { + __u8 version; + __le16 revision; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7ea5489e7977..3e24c0bf18e7 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -29,6 +29,39 @@ #include #include +#define MGMT_VERSION 0 +#define MGMT_REVISION 1 + +static int read_version(struct sock *sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_version *rp; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_READ_VERSION, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + rp->version = MGMT_VERSION; + put_unaligned_le16(MGMT_REVISION, &rp->revision); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + static int cmd_status(struct sock *sk, u16 cmd, u8 status) { struct sk_buff *skb; @@ -87,6 +120,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) } switch (opcode) { + case MGMT_OP_READ_VERSION: + err = read_version(sk); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.2.3-59-g8ed1b From faba42eb2a8cf905ed26d540c3c93d429e327224 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 13 Dec 2010 21:07:05 +0200 Subject: Bluetooth: Add read_index_list management command This patch implements the read_index_list command through which userspace can get a list of current adapter indices. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/mgmt.h | 6 +++++ net/bluetooth/mgmt.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index d353d64bfffb..c2b4c83ab175 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -33,6 +33,12 @@ struct mgmt_rp_read_version { __le16 revision; } __packed; +#define MGMT_OP_READ_INDEX_LIST 0x0003 +struct mgmt_rp_read_index_list { + __le16 num_controllers; + __le16 index[0]; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 3e24c0bf18e7..7a8e321875c9 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -62,6 +62,56 @@ static int read_version(struct sock *sk) return 0; } +static int read_index_list(struct sock *sk) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_index_list *rp; + struct list_head *p; + size_t body_len; + u16 count; + int i; + + BT_DBG("sock %p", sk); + + read_lock(&hci_dev_list_lock); + + count = 0; + list_for_each(p, &hci_dev_list) { + count++; + } + + body_len = sizeof(*ev) + sizeof(*rp) + (2 * count); + skb = alloc_skb(sizeof(*hdr) + body_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(body_len); + + ev = (void *) skb_put(skb, sizeof(*ev)); + put_unaligned_le16(MGMT_OP_READ_INDEX_LIST, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp) + (2 * count)); + put_unaligned_le16(count, &rp->num_controllers); + + i = 0; + list_for_each(p, &hci_dev_list) { + struct hci_dev *d = list_entry(p, struct hci_dev, list); + put_unaligned_le16(d->id, &rp->index[i++]); + BT_DBG("Added hci%u", d->id); + } + + read_unlock(&hci_dev_list_lock); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + static int cmd_status(struct sock *sk, u16 cmd, u8 status) { struct sk_buff *skb; @@ -123,6 +173,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_READ_VERSION: err = read_version(sk); break; + case MGMT_OP_READ_INDEX_LIST: + err = read_index_list(sk); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.2.3-59-g8ed1b From f7b64e69c7c75c8e9f2d5e23edec8de1ce883bcc Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 13 Dec 2010 21:07:06 +0200 Subject: Bluetooth: Add read_info management command This patch implements the read_info command which is used to fetch basic info about an adapter. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/mgmt.h | 19 ++++++++++ net/bluetooth/mgmt.c | 90 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 101 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index c2b4c83ab175..70985aacc14b 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -39,6 +39,25 @@ struct mgmt_rp_read_index_list { __le16 index[0]; } __packed; +#define MGMT_OP_READ_INFO 0x0004 +struct mgmt_cp_read_info { + __le16 index; +} __packed; +struct mgmt_rp_read_info { + __le16 index; + __u8 type; + __u8 powered; + __u8 discoverable; + __u8 pairable; + __u8 sec_mode; + bdaddr_t bdaddr; + __u8 dev_class[3]; + __u8 features[8]; + __u16 manufacturer; + __u8 hci_ver; + __u16 hci_rev; +} __packed; + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7a8e321875c9..d6c5a32de0b6 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -32,6 +32,33 @@ #define MGMT_VERSION 0 #define MGMT_REVISION 1 +static int cmd_status(struct sock *sk, u16 cmd, u8 status) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + struct mgmt_ev_cmd_status *ev; + + BT_DBG("sock %p", sk); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); + hdr->len = cpu_to_le16(sizeof(*ev)); + + ev = (void *) skb_put(skb, sizeof(*ev)); + ev->status = status; + put_unaligned_le16(cmd, &ev->opcode); + + if (sock_queue_rcv_skb(sk, skb) < 0) + kfree_skb(skb); + + return 0; +} + static int read_version(struct sock *sk) { struct sk_buff *skb; @@ -112,26 +139,70 @@ static int read_index_list(struct sock *sk) return 0; } -static int cmd_status(struct sock *sk, u16 cmd, u8 status) +static int read_controller_info(struct sock *sk, unsigned char *data, u16 len) { struct sk_buff *skb; struct mgmt_hdr *hdr; - struct mgmt_ev_cmd_status *ev; + struct mgmt_ev_cmd_complete *ev; + struct mgmt_rp_read_info *rp; + struct mgmt_cp_read_info *cp; + struct hci_dev *hdev; + u16 dev_id; BT_DBG("sock %p", sk); - skb = alloc_skb(sizeof(*hdr) + sizeof(*ev), GFP_ATOMIC); + if (len != 2) + return cmd_status(sk, MGMT_OP_READ_INFO, EINVAL); + + skb = alloc_skb(sizeof(*hdr) + sizeof(*ev) + sizeof(*rp), GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = (void *) skb_put(skb, sizeof(*hdr)); - - hdr->opcode = cpu_to_le16(MGMT_EV_CMD_STATUS); - hdr->len = cpu_to_le16(sizeof(*ev)); + hdr->opcode = cpu_to_le16(MGMT_EV_CMD_COMPLETE); + hdr->len = cpu_to_le16(sizeof(*ev) + sizeof(*rp)); ev = (void *) skb_put(skb, sizeof(*ev)); - ev->status = status; - put_unaligned_le16(cmd, &ev->opcode); + put_unaligned_le16(MGMT_OP_READ_INFO, &ev->opcode); + + rp = (void *) skb_put(skb, sizeof(*rp)); + + cp = (void *) data; + dev_id = get_unaligned_le16(&cp->index); + + BT_DBG("request for hci%u", dev_id); + + hdev = hci_dev_get(dev_id); + if (!hdev) { + kfree_skb(skb); + return cmd_status(sk, MGMT_OP_READ_INFO, ENODEV); + } + + hci_dev_lock_bh(hdev); + + put_unaligned_le16(hdev->id, &rp->index); + rp->type = hdev->dev_type; + + rp->powered = test_bit(HCI_UP, &hdev->flags); + rp->discoverable = test_bit(HCI_ISCAN, &hdev->flags); + rp->pairable = test_bit(HCI_PSCAN, &hdev->flags); + + if (test_bit(HCI_AUTH, &hdev->flags)) + rp->sec_mode = 3; + else if (hdev->ssp_mode > 0) + rp->sec_mode = 4; + else + rp->sec_mode = 2; + + bacpy(&rp->bdaddr, &hdev->bdaddr); + memcpy(rp->features, hdev->features, 8); + memcpy(rp->dev_class, hdev->dev_class, 3); + put_unaligned_le16(hdev->manufacturer, &rp->manufacturer); + rp->hci_ver = hdev->hci_ver; + put_unaligned_le16(hdev->hci_rev, &rp->hci_rev); + + hci_dev_unlock_bh(hdev); + hci_dev_put(hdev); if (sock_queue_rcv_skb(sk, skb) < 0) kfree_skb(skb); @@ -176,6 +247,9 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen) case MGMT_OP_READ_INDEX_LIST: err = read_index_list(sk); break; + case MGMT_OP_READ_INFO: + err = read_controller_info(sk, buf + sizeof(*hdr), len); + break; default: BT_DBG("Unknown op %u", opcode); err = cmd_status(sk, opcode, 0x01); -- cgit v1.2.3-59-g8ed1b From c71e97bfaadfa727669fcfcf12301744fd169091 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 13 Dec 2010 21:07:07 +0200 Subject: Bluetooth: Add management events for controller addition & removal This patch adds Bluetooth Management interface events for controller addition and removal. The events correspond to the existing HCI_DEV_REG and HCI_DEV_UNREG stack internal events. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci_core.h | 2 ++ include/net/bluetooth/mgmt.h | 10 ++++++++++ net/bluetooth/hci_core.c | 2 ++ net/bluetooth/mgmt.c | 41 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 1992fac7e921..3786ee83604e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -662,6 +662,8 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); /* Management interface */ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len); +int mgmt_index_added(u16 index); +int mgmt_index_removed(u16 index); /* HCI info for socket */ #define hci_pi(sk) ((struct hci_pinfo *) sk) diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 70985aacc14b..ca29c1367ffd 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -75,3 +75,13 @@ struct mgmt_ev_controller_error { __le16 index; __u8 error_code; } __packed; + +#define MGMT_EV_INDEX_ADDED 0x0004 +struct mgmt_ev_index_added { + __le16 index; +} __packed; + +#define MGMT_EV_INDEX_REMOVED 0x0005 +struct mgmt_ev_index_removed { + __le16 index; +} __packed; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 51c61f75a797..1a4ec97d5ac4 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -960,6 +960,7 @@ int hci_register_dev(struct hci_dev *hdev) } } + mgmt_index_added(hdev->id); hci_notify(hdev, HCI_DEV_REG); return id; @@ -989,6 +990,7 @@ int hci_unregister_dev(struct hci_dev *hdev) for (i = 0; i < NUM_REASSEMBLY; i++) kfree_skb(hdev->reassembly[i]); + mgmt_index_removed(hdev->id); hci_notify(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index d6c5a32de0b6..f827fd908380 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -265,3 +265,44 @@ done: kfree(buf); return err; } + +static int mgmt_event(u16 event, void *data, u16 data_len) +{ + struct sk_buff *skb; + struct mgmt_hdr *hdr; + + skb = alloc_skb(sizeof(*hdr) + data_len, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + bt_cb(skb)->channel = HCI_CHANNEL_CONTROL; + + hdr = (void *) skb_put(skb, sizeof(*hdr)); + hdr->opcode = cpu_to_le16(event); + hdr->len = cpu_to_le16(data_len); + + memcpy(skb_put(skb, data_len), data, data_len); + + hci_send_to_sock(NULL, skb); + kfree_skb(skb); + + return 0; +} + +int mgmt_index_added(u16 index) +{ + struct mgmt_ev_index_added ev; + + put_unaligned_le16(index, &ev.index); + + return mgmt_event(MGMT_EV_INDEX_ADDED, &ev, sizeof(ev)); +} + +int mgmt_index_removed(u16 index) +{ + struct mgmt_ev_index_added ev; + + put_unaligned_le16(index, &ev.index); + + return mgmt_event(MGMT_EV_INDEX_REMOVED, &ev, sizeof(ev)); +} -- cgit v1.2.3-59-g8ed1b From 23bb57633df97ede067ea26f3cdc8a7ba2cd8109 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Tue, 21 Dec 2010 23:01:27 +0200 Subject: Bluetooth: Fix __hci_request synchronization for hci_open_dev The initialization function used by hci_open_dev (hci_init_req) sends many different HCI commands. The __hci_request function should only return when all of these commands have completed (or a timeout occurs). Several of these commands cause hci_req_complete to be called which causes __hci_request to return prematurely. This patch fixes the issue by adding a new hdev->req_last_cmd variable which is set during the initialization procedure. The hci_req_complete function will no longer mark the request as complete until the command matching hdev->req_last_cmd completes. Signed-off-by: Johan Hedberg Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/hci_core.h | 3 ++- net/bluetooth/hci_core.c | 15 ++++++++++++--- net/bluetooth/hci_event.c | 33 +++++++++++++++++++++++---------- 3 files changed, 37 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 3786ee83604e..a29feb01854e 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -129,6 +129,7 @@ struct hci_dev { wait_queue_head_t req_wait_q; __u32 req_status; __u32 req_result; + __u16 req_last_cmd; struct inquiry_cache inq_cache; struct hci_conn_hash conn_hash; @@ -693,6 +694,6 @@ struct hci_sec_filter { #define hci_req_lock(d) mutex_lock(&d->req_lock) #define hci_req_unlock(d) mutex_unlock(&d->req_lock) -void hci_req_complete(struct hci_dev *hdev, int result); +void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result); #endif /* __HCI_CORE_H */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 1a4ec97d5ac4..8b602d881fd7 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -91,9 +91,16 @@ static void hci_notify(struct hci_dev *hdev, int event) /* ---- HCI requests ---- */ -void hci_req_complete(struct hci_dev *hdev, int result) +void hci_req_complete(struct hci_dev *hdev, __u16 cmd, int result) { - BT_DBG("%s result 0x%2.2x", hdev->name, result); + BT_DBG("%s command 0x%04x result 0x%2.2x", hdev->name, cmd, result); + + /* If the request has set req_last_cmd (typical for multi-HCI + * command requests) check if the completed command matches + * this, and if not just return. Single HCI command requests + * typically leave req_last_cmd as 0 */ + if (hdev->req_last_cmd && cmd != hdev->req_last_cmd) + return; if (hdev->req_status == HCI_REQ_PEND) { hdev->req_result = result; @@ -149,7 +156,7 @@ static int __hci_request(struct hci_dev *hdev, void (*req)(struct hci_dev *hdev, break; } - hdev->req_status = hdev->req_result = 0; + hdev->req_last_cmd = hdev->req_status = hdev->req_result = 0; BT_DBG("%s end: err %d", hdev->name, err); @@ -252,6 +259,8 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Connection accept timeout ~20 secs */ param = cpu_to_le16(0x7d00); hci_send_cmd(hdev, HCI_OP_WRITE_CA_TIMEOUT, 2, ¶m); + + hdev->req_last_cmd = HCI_OP_WRITE_CA_TIMEOUT; } static void hci_scan_req(struct hci_dev *hdev, unsigned long opt) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 8923b36a67a2..38100170d380 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -58,7 +58,7 @@ static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY_CANCEL, status); hci_conn_check_pending(hdev); } @@ -174,7 +174,7 @@ static void hci_cc_write_def_link_policy(struct hci_dev *hdev, struct sk_buff *s if (!status) hdev->link_policy = get_unaligned_le16(sent); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, status); } static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) @@ -183,7 +183,7 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_RESET, status); } static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb) @@ -235,7 +235,7 @@ static void hci_cc_write_auth_enable(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_AUTH, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_AUTH_ENABLE, status); } static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -258,7 +258,7 @@ static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb) clear_bit(HCI_ENCRYPT, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_ENCRYPT_MODE, status); } static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) @@ -285,7 +285,7 @@ static void hci_cc_write_scan_enable(struct hci_dev *hdev, struct sk_buff *skb) set_bit(HCI_PSCAN, &hdev->flags); } - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_WRITE_SCAN_ENABLE, status); } static void hci_cc_read_class_of_dev(struct hci_dev *hdev, struct sk_buff *skb) @@ -383,7 +383,7 @@ static void hci_cc_host_buffer_size(struct hci_dev *hdev, struct sk_buff *skb) BT_DBG("%s status 0x%x", hdev->name, status); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_HOST_BUFFER_SIZE, status); } static void hci_cc_read_ssp_mode(struct hci_dev *hdev, struct sk_buff *skb) @@ -536,7 +536,16 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb) if (!rp->status) bacpy(&hdev->bdaddr, &rp->bdaddr); - hci_req_complete(hdev, rp->status); + hci_req_complete(hdev, HCI_OP_READ_BD_ADDR, rp->status); +} + +static void hci_cc_write_ca_timeout(struct hci_dev *hdev, struct sk_buff *skb) +{ + __u8 status = *((__u8 *) skb->data); + + BT_DBG("%s status 0x%x", hdev->name, status); + + hci_req_complete(hdev, HCI_OP_WRITE_CA_TIMEOUT, status); } static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) @@ -544,7 +553,7 @@ static inline void hci_cs_inquiry(struct hci_dev *hdev, __u8 status) BT_DBG("%s status 0x%x", hdev->name, status); if (status) { - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); } else @@ -871,7 +880,7 @@ static inline void hci_inquiry_complete_evt(struct hci_dev *hdev, struct sk_buff clear_bit(HCI_INQUIRY, &hdev->flags); - hci_req_complete(hdev, status); + hci_req_complete(hdev, HCI_OP_INQUIRY, status); hci_conn_check_pending(hdev); } @@ -1379,6 +1388,10 @@ static inline void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *sk hci_cc_read_bd_addr(hdev, skb); break; + case HCI_OP_WRITE_CA_TIMEOUT: + hci_cc_write_ca_timeout(hdev, skb); + break; + default: BT_DBG("%s opcode 0x%x", hdev->name, opcode); break; -- cgit v1.2.3-59-g8ed1b From e058464990c2ef1f3ecd6b83a154913c3c06f02a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 23 Dec 2010 12:03:57 -0800 Subject: Revert "ipv4: Allow configuring subnets as local addresses" This reverts commit 4465b469008bc03b98a1b8df4e9ae501b6c69d4b. Conflicts: net/ipv4/fib_frontend.c As reported by Ben Greear, this causes regressions: > Change 4465b469008bc03b98a1b8df4e9ae501b6c69d4b caused rules > to stop matching the input device properly because the > FLOWI_FLAG_MATCH_ANY_IIF is always defined in ip_dev_find(). > > This breaks rules such as: > > ip rule add pref 512 lookup local > ip rule del pref 0 lookup local > ip link set eth2 up > ip -4 addr add 172.16.0.102/24 broadcast 172.16.0.255 dev eth2 > ip rule add to 172.16.0.102 iif eth2 lookup local pref 10 > ip rule add iif eth2 lookup 10001 pref 20 > ip route add 172.16.0.0/24 dev eth2 table 10001 > ip route add unreachable 0/0 table 10001 > > If you had a second interface 'eth0' that was on a different > subnet, pinging a system on that interface would fail: > > [root@ct503-60 ~]# ping 192.168.100.1 > connect: Invalid argument Reported-by: Ben Greear Signed-off-by: David S. Miller --- include/net/flow.h | 1 - net/core/fib_rules.c | 3 +-- net/ipv4/fib_frontend.c | 10 ++++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 0ac3fb5e0973..bb08692a20b0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,7 +49,6 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 -#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 82a4369ae150..a20e5d3bbfa0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -181,8 +181,7 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif) && - !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF)) + if (rule->iifindex && (rule->iifindex != fl->iif)) goto out; if (rule->oifindex && (rule->oifindex != fl->oif)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index eb6f69a8f27a..c19c1f739fba 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -163,13 +163,19 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) .daddr = addr } }, - .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res = { 0 }; struct net_device *dev = NULL; + struct fib_table *local_table; + +#ifdef CONFIG_IP_MULTIPLE_TABLES + res.r = NULL; +#endif rcu_read_lock(); - if (fib_lookup(net, &fl, &res)) { + local_table = fib_get_table(net, RT_TABLE_LOCAL); + if (!local_table || + fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { rcu_read_unlock(); return NULL; } -- cgit v1.2.3-59-g8ed1b From 3e29027af43728c2a91fe3f735ab2822edaf54a8 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 30 Dec 2010 09:25:46 +0000 Subject: dcbnl: add support for ieee8021Qaz attributes The IEEE8021Qaz is the IEEE standard version of CEE. The standard has had enough significant changes from the CEE version that many of the CEE attributes have no meaning in the new spec or do not easily map to IEEE standards. Rather then attempt to create a complicated mapping between CEE and IEEE standards this patch adds a nested IEEE attribute to the list of DCB attributes. The policy is, [DCB_ATTR_IFNAME] [DCB_ATTR_STATE] ... [DCB_ATTR_IEEE] [DCB_ATTR_IEEE_ETS] [DCB_ATTR_IEEE_PFC] [DCB_ATTR_IEEE_APP_TABLE] [DCB_ATTR_IEEE_APP] ... The following dcbnl_rtnl_ops routines were added to handle the IEEE standard, int (*ieee_getets) (struct net_device *, struct ieee_ets *); int (*ieee_setets) (struct net_device *, struct ieee_ets *); int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *); int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *); int (*ieee_getapp) (struct net_device *, struct dcb_app *); int (*ieee_setapp) (struct net_device *, struct dcb_app *); Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 106 ++++++++++++++++++++++++++++++++++++++++ include/net/dcbnl.h | 11 +++++ net/dcb/dcbnl.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 248 insertions(+) (limited to 'include/net') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 8723491f7dfd..287b5618e296 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -22,6 +22,87 @@ #include +/* IEEE 802.1Qaz std supported values */ +#define IEEE_8021QAZ_MAX_TCS 8 + +/* This structure contains the IEEE 802.1Qaz ETS managed object + * + * @willing: willing bit in ETS configuratin TLV + * @ets_cap: indicates supported capacity of ets feature + * @cbs: credit based shaper ets algorithm supported + * @tc_tx_bw: tc tx bandwidth indexed by traffic class + * @tc_rx_bw: tc rx bandwidth indexed by traffic class + * @tc_tsa: TSA Assignment table, indexed by traffic class + * @prio_tc: priority assignment table mapping 8021Qp to traffic class + * @tc_reco_bw: recommended tc bandwidth indexed by traffic class for TLV + * @tc_reco_tsa: recommended tc bandwidth indexed by traffic class for TLV + * @reco_prio_tc: recommended tc tx bandwidth indexed by traffic class for TLV + * + * Recommended values are used to set fields in the ETS recommendation TLV + * with hardware offloaded LLDP. + * + * ---- + * TSA Assignment 8 bit identifiers + * 0 strict priority + * 1 credit-based shaper + * 2 enhanced transmission selection + * 3-254 reserved + * 255 vendor specific + */ +struct ieee_ets { + __u8 willing; + __u8 ets_cap; + __u8 cbs; + __u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_rx_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_tsa[IEEE_8021QAZ_MAX_TCS]; + __u8 prio_tc[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_reco_bw[IEEE_8021QAZ_MAX_TCS]; + __u8 tc_reco_tsa[IEEE_8021QAZ_MAX_TCS]; + __u8 reco_prio_tc[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains the IEEE 802.1Qaz PFC managed object + * + * @pfc_cap: Indicates the number of traffic classes on the local device + * that may simultaneously have PFC enabled. + * @pfc_en: bitmap indicating pfc enabled traffic classes + * @mbc: enable macsec bypass capability + * @delay: the allowance made for a round-trip propagation delay of the + * link in bits. + * @requests: count of the sent pfc frames + * @indications: count of the received pfc frames + */ +struct ieee_pfc { + __u8 pfc_cap; + __u8 pfc_en; + __u8 mbc; + __u16 delay; + __u64 requests[IEEE_8021QAZ_MAX_TCS]; + __u64 indications[IEEE_8021QAZ_MAX_TCS]; +}; + +/* This structure contains the IEEE 802.1Qaz APP managed object + * + * @selector: protocol identifier type + * @protocol: protocol of type indicated + * @priority: 3-bit unsigned integer indicating priority + * + * ---- + * Selector field values + * 0 Reserved + * 1 Ethertype + * 2 Well known port number over TCP or SCTP + * 3 Well known port number over UDP or DCCP + * 4 Well known port number over TCP, SCTP, UDP, or DCCP + * 5-7 Reserved + */ +struct dcb_app { + __u8 selector; + __u32 protocol; + __u8 priority; +}; + struct dcbmsg { __u8 dcb_family; __u8 cmd; @@ -50,6 +131,8 @@ struct dcbmsg { * @DCB_CMD_SBCN: get backward congestion notification configration. * @DCB_CMD_GAPP: get application protocol configuration * @DCB_CMD_SAPP: set application protocol configuration + * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration + * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -83,6 +166,9 @@ enum dcbnl_commands { DCB_CMD_GAPP, DCB_CMD_SAPP, + DCB_CMD_IEEE_SET, + DCB_CMD_IEEE_GET, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; @@ -102,6 +188,7 @@ enum dcbnl_commands { * @DCB_ATTR_CAP: DCB capabilities of the device (NLA_NESTED) * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) + * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -119,10 +206,29 @@ enum dcbnl_attrs { DCB_ATTR_BCN, DCB_ATTR_APP, + /* IEEE std attributes */ + DCB_ATTR_IEEE, + __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, }; +enum ieee_attrs { + DCB_ATTR_IEEE_UNSPEC, + DCB_ATTR_IEEE_ETS, + DCB_ATTR_IEEE_PFC, + DCB_ATTR_IEEE_APP_TABLE, + __DCB_ATTR_IEEE_MAX +}; +#define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) + +enum ieee_attrs_app { + DCB_ATTR_IEEE_APP_UNSPEC, + DCB_ATTR_IEEE_APP, + __DCB_ATTR_IEEE_APP_MAX +}; +#define DCB_ATTR_IEEE_APP_MAX (__DCB_ATTR_IEEE_APP_MAX - 1) + /** * enum dcbnl_pfc_attrs - DCB Priority Flow Control user priority nested attrs * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index b36ac7e0914d..e2d841e963b3 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -20,11 +20,22 @@ #ifndef __NET_DCBNL_H__ #define __NET_DCBNL_H__ +#include + /* * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through * the netdevice struct. */ struct dcbnl_rtnl_ops { + /* IEEE 802.1Qaz std */ + int (*ieee_getets) (struct net_device *, struct ieee_ets *); + int (*ieee_setets) (struct net_device *, struct ieee_ets *); + int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *); + int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *); + int (*ieee_getapp) (struct net_device *, struct dcb_app *); + int (*ieee_setapp) (struct net_device *, struct dcb_app *); + + /* CEE std */ u8 (*getstate)(struct net_device *); u8 (*setstate)(struct net_device *, u8); void (*getpermhwaddr)(struct net_device *, u8 *); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 19ac2b985485..2ff908498924 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -66,6 +66,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_PFC_STATE] = {.type = NLA_U8}, [DCB_ATTR_BCN] = {.type = NLA_NESTED}, [DCB_ATTR_APP] = {.type = NLA_NESTED}, + [DCB_ATTR_IEEE] = {.type = NLA_NESTED}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -167,6 +168,17 @@ static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { [DCB_APP_ATTR_PRIORITY] = {.type = NLA_U8}, }; +/* IEEE 802.1Qaz nested attributes. */ +static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = { + [DCB_ATTR_IEEE_ETS] = {.len = sizeof(struct ieee_ets)}, + [DCB_ATTR_IEEE_PFC] = {.len = sizeof(struct ieee_pfc)}, + [DCB_ATTR_IEEE_APP_TABLE] = {.type = NLA_NESTED}, +}; + +static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { + [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, +}; + /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, u32 seq, u16 flags) @@ -1118,6 +1130,117 @@ err: return ret; } +/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not + * be completed the entire msg is aborted and error value is returned. + * No attempt is made to reconcile the case where only part of the + * cmd can be completed. + */ +static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int err = -EOPNOTSUPP; + + if (!ops) + goto err; + + err = nla_parse_nested(ieee, DCB_ATTR_IEEE_MAX, + tb[DCB_ATTR_IEEE], dcbnl_ieee_policy); + if (err) + goto err; + + if (ieee[DCB_ATTR_IEEE_ETS] && ops->ieee_setets) { + struct ieee_ets *ets = nla_data(ieee[DCB_ATTR_IEEE_ETS]); + err = ops->ieee_setets(netdev, ets); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setets) { + struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]); + err = ops->ieee_setpfc(netdev, pfc); + if (err) + goto err; + } + + if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) { + struct nlattr *attr; + int rem; + + nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) { + struct dcb_app *app_data; + if (nla_type(attr) != DCB_ATTR_IEEE_APP) + continue; + app_data = nla_data(attr); + err = ops->ieee_setapp(netdev, app_data); + if (err) + goto err; + } + } + +err: + dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, + pid, seq, flags); + return err; +} + + +/* Handle IEEE 802.1Qaz GET commands. */ +static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *ieee; + const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; + int err; + + if (!ops) + return -EOPNOTSUPP; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + nlh = NLMSG_NEW(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_IEEE_GET; + + NLA_PUT_STRING(skb, DCB_ATTR_IFNAME, netdev->name); + + ieee = nla_nest_start(skb, DCB_ATTR_IEEE); + if (!ieee) + goto nla_put_failure; + + if (ops->ieee_getets) { + struct ieee_ets ets; + err = ops->ieee_getets(netdev, &ets); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets); + } + + if (ops->ieee_getpfc) { + struct ieee_pfc pfc; + err = ops->ieee_getpfc(netdev, &pfc); + if (!err) + NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc); + } + + nla_nest_end(skb, ieee); + nlmsg_end(skb, nlh); + + return rtnl_unicast(skb, &init_net, pid); +nla_put_failure: + nlmsg_cancel(skb, nlh); +nlmsg_failure: + kfree_skb(skb); + return -1; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1223,6 +1346,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setapp(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_IEEE_SET: + ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_IEEE_GET: + ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3-59-g8ed1b From 9ab933ab2cc80f04690d6aa385b1110075c5e507 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 30 Dec 2010 09:26:31 +0000 Subject: dcbnl: add appliction tlv handlers This patch adds application tlv handlers. Networking stacks may use the application priority to set the skb priority of their stack using the negoatiated dcbx priority. This patch provides the dcb_{get|set}app() routines for the stack to query these parameters. Notice lower layer drivers can use the dcbnl_ops routines if additional handling is needed. Perhaps in the firmware case for example Signed-off-by: John Fastabend Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 4 +- include/net/dcbnl.h | 9 ++++ net/dcb/dcbnl.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 135 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 287b5618e296..775bdb4465bf 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -82,7 +82,9 @@ struct ieee_pfc { __u64 indications[IEEE_8021QAZ_MAX_TCS]; }; -/* This structure contains the IEEE 802.1Qaz APP managed object +/* This structure contains the IEEE 802.1Qaz APP managed object. This + * object is also used for the CEE std as well. There is no difference + * between the objects. * * @selector: protocol identifier type * @protocol: protocol of type indicated diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index e2d841e963b3..ab7d623a2793 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -22,6 +22,15 @@ #include +struct dcb_app_type { + char name[IFNAMSIZ]; + struct dcb_app app; + struct list_head list; +}; + +u8 dcb_setapp(struct net_device *, struct dcb_app *); +u8 dcb_getapp(struct net_device *, struct dcb_app *); + /* * Ops struct for the netlink callbacks. Used by DCB-enabled drivers through * the netdevice struct. diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 2ff908498924..cfd731faf6c6 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -179,6 +179,9 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, }; +static LIST_HEAD(dcb_app_list); +static DEFINE_SPINLOCK(dcb_lock); + /* standard netlink reply call */ static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, u32 seq, u16 flags) @@ -634,12 +637,12 @@ out: static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb, u32 pid, u32 seq, u16 flags) { - int ret = -EINVAL; + int err, ret = -EINVAL; u16 id; u8 up, idtype; struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1]; - if (!tb[DCB_ATTR_APP] || !netdev->dcbnl_ops->setapp) + if (!tb[DCB_ATTR_APP]) goto out; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], @@ -663,9 +666,18 @@ static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb, id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); up = nla_get_u8(app_tb[DCB_APP_ATTR_PRIORITY]); - ret = dcbnl_reply(netdev->dcbnl_ops->setapp(netdev, idtype, id, up), - RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP, - pid, seq, flags); + if (netdev->dcbnl_ops->setapp) { + err = netdev->dcbnl_ops->setapp(netdev, idtype, id, up); + } else { + struct dcb_app app; + app.selector = idtype; + app.protocol = id; + app.priority = up; + err = dcb_setapp(netdev, &app); + } + + ret = dcbnl_reply(err, RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP, + pid, seq, flags); out: return ret; } @@ -1164,7 +1176,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, goto err; } - if (ieee[DCB_ATTR_IEEE_APP_TABLE] && ops->ieee_setapp) { + if (ieee[DCB_ATTR_IEEE_APP_TABLE]) { struct nlattr *attr; int rem; @@ -1173,7 +1185,10 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, if (nla_type(attr) != DCB_ATTR_IEEE_APP) continue; app_data = nla_data(attr); - err = ops->ieee_setapp(netdev, app_data); + if (ops->ieee_setapp) + err = ops->ieee_setapp(netdev, app_data); + else + err = dcb_setapp(netdev, app_data); if (err) goto err; } @@ -1193,7 +1208,8 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, struct sk_buff *skb; struct nlmsghdr *nlh; struct dcbmsg *dcb; - struct nlattr *ieee; + struct nlattr *ieee, *app; + struct dcb_app_type *itr; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; int err; @@ -1230,6 +1246,19 @@ static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, NLA_PUT(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc); } + app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE); + if (!app) + goto nla_put_failure; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (strncmp(itr->name, netdev->name, IFNAMSIZ) == 0) + NLA_PUT(skb, DCB_ATTR_IEEE_APP, + sizeof(itr->app), &itr->app); + } + spin_unlock(&dcb_lock); + nla_nest_end(skb, app); + nla_nest_end(skb, ieee); nlmsg_end(skb, nlh); @@ -1364,8 +1393,93 @@ out: return ret; } +/** + * dcb_getapp - retrieve the DCBX application user priority + * + * On success returns a non-zero 802.1p user priority bitmap + * otherwise returns 0 as the invalid user priority bitmap to + * indicate an error. + */ +u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) +{ + struct dcb_app_type *itr; + u8 prio = 0; + + spin_lock(&dcb_lock); + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + prio = itr->app.priority; + break; + } + } + spin_unlock(&dcb_lock); + + return prio; +} +EXPORT_SYMBOL(dcb_getapp); + +/** + * ixgbe_dcbnl_setapp - add dcb application data to app list + * + * Priority 0 is the default priority this removes applications + * from the app list if the priority is set to zero. + */ +u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) +{ + struct dcb_app_type *itr; + + spin_lock(&dcb_lock); + /* Search for existing match and replace */ + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == new->selector && + itr->app.protocol == new->protocol && + (strncmp(itr->name, dev->name, IFNAMSIZ) == 0)) { + if (new->priority) + itr->app.priority = new->priority; + else { + list_del(&itr->list); + kfree(itr); + } + goto out; + } + } + /* App type does not exist add new application type */ + if (new->priority) { + struct dcb_app_type *entry; + entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); + if (!entry) { + spin_unlock(&dcb_lock); + return -ENOMEM; + } + + memcpy(&entry->app, new, sizeof(*new)); + strncpy(entry->name, dev->name, IFNAMSIZ); + list_add(&entry->list, &dcb_app_list); + } +out: + spin_unlock(&dcb_lock); + return 0; +} +EXPORT_SYMBOL(dcb_setapp); + +void dcb_flushapp(void) +{ + struct dcb_app_type *app; + + spin_lock(&dcb_lock); + list_for_each_entry(app, &dcb_app_list, list) { + list_del(&app->list); + kfree(app); + } + spin_unlock(&dcb_lock); +} + static int __init dcbnl_init(void) { + INIT_LIST_HEAD(&dcb_app_list); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL); @@ -1377,7 +1491,6 @@ static void __exit dcbnl_exit(void) { rtnl_unregister(PF_UNSPEC, RTM_GETDCB); rtnl_unregister(PF_UNSPEC, RTM_SETDCB); + dcb_flushapp(); } module_exit(dcbnl_exit); - - -- cgit v1.2.3-59-g8ed1b From 96b99684e365f28d49bdb1221ca022b75cb91a98 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Thu, 30 Dec 2010 09:26:37 +0000 Subject: net_dcb: add application notifiers DCBx applications priorities can be changed dynamically. If application stacks are expected to keep the skb priority consistent with the dcbx priority the stack will need to be notified when these changes occur. This patch adds application notifiers for the stack to register with. Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/net/dcbevent.h | 31 +++++++++++++++++++++++++++++++ net/dcb/Makefile | 2 +- net/dcb/dcbevent.c | 40 ++++++++++++++++++++++++++++++++++++++++ net/dcb/dcbnl.c | 2 ++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 include/net/dcbevent.h create mode 100644 net/dcb/dcbevent.c (limited to 'include/net') diff --git a/include/net/dcbevent.h b/include/net/dcbevent.h new file mode 100644 index 000000000000..bc1e7ef40171 --- /dev/null +++ b/include/net/dcbevent.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: John Fastabend + */ + +#ifndef _DCB_EVENT_H +#define _DCB_EVENT_H + +enum dcbevent_notif_type { + DCB_APP_EVENT = 1, +}; + +extern int register_dcbevent_notifier(struct notifier_block *nb); +extern int unregister_dcbevent_notifier(struct notifier_block *nb); +extern int call_dcbevent_notifiers(unsigned long val, void *v); + +#endif diff --git a/net/dcb/Makefile b/net/dcb/Makefile index 9930f4cde818..c1282c9e64fa 100644 --- a/net/dcb/Makefile +++ b/net/dcb/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DCB) += dcbnl.o +obj-$(CONFIG_DCB) += dcbnl.o dcbevent.o diff --git a/net/dcb/dcbevent.c b/net/dcb/dcbevent.c new file mode 100644 index 000000000000..665a8802105a --- /dev/null +++ b/net/dcb/dcbevent.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2010, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: John Fastabend + */ + +#include +#include + +static ATOMIC_NOTIFIER_HEAD(dcbevent_notif_chain); + +int register_dcbevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&dcbevent_notif_chain, nb); +} +EXPORT_SYMBOL(register_dcbevent_notifier); + +int unregister_dcbevent_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&dcbevent_notif_chain, nb); +} +EXPORT_SYMBOL(unregister_dcbevent_notifier); + +int call_dcbevent_notifiers(unsigned long val, void *v) +{ + return atomic_notifier_call_chain(&dcbevent_notif_chain, val, v); +} diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index cfd731faf6c6..69144125fc4f 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -1460,6 +1461,7 @@ u8 dcb_setapp(struct net_device *dev, struct dcb_app *new) } out: spin_unlock(&dcb_lock); + call_dcbevent_notifiers(DCB_APP_EVENT, new); return 0; } EXPORT_SYMBOL(dcb_setapp); -- cgit v1.2.3-59-g8ed1b From 6241b6259b16aa390ff4bf50f520685b3801200b Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Thu, 30 Dec 2010 06:26:48 +0000 Subject: dcbnl: adding DCBX engine capability Adding an optional DCBX capability and a pair for get-set routines for setting the device DCBX mode. The DCBX capability is a bit field of supported attributes. The user is expected to set the DCBX mode with a subset of the advertised attributes. This patch is dependent on the following patches: [net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes [net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers [net-next-2.6 PATCH 3/3] net_dcb: add application notifiers Signed-off-by: Shmulik Ravid Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/net/dcbnl.h | 5 +++++ net/dcb/dcbnl.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+) (limited to 'include/net') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 775bdb4465bf..16eea36d8934 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -135,6 +135,8 @@ struct dcbmsg { * @DCB_CMD_SAPP: set application protocol configuration * @DCB_CMD_IEEE_SET: set IEEE 802.1Qaz configuration * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration + * @DCB_CMD_GDCBX: get DCBX engine configuration + * @DCB_CMD_SDCBX: set DCBX engine configuration */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -171,6 +173,9 @@ enum dcbnl_commands { DCB_CMD_IEEE_SET, DCB_CMD_IEEE_GET, + DCB_CMD_GDCBX, + DCB_CMD_SDCBX, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; @@ -191,6 +196,7 @@ enum dcbnl_commands { * @DCB_ATTR_NUMTCS: number of traffic classes supported (NLA_NESTED) * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED) + * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -211,6 +217,8 @@ enum dcbnl_attrs { /* IEEE std attributes */ DCB_ATTR_IEEE, + DCB_ATTR_DCBX, + __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, }; @@ -370,6 +378,8 @@ enum dcbnl_tc_attrs { * @DCB_CAP_ATTR_GSP: (NLA_U8) device supports group strict priority * @DCB_CAP_ATTR_BCN: (NLA_U8) device supports Backwards Congestion * Notification + * @DCB_CAP_ATTR_DCBX: (NLA_U8) device supports DCBX engine + * */ enum dcbnl_cap_attrs { DCB_CAP_ATTR_UNDEFINED, @@ -381,11 +391,44 @@ enum dcbnl_cap_attrs { DCB_CAP_ATTR_PFC_TCS, DCB_CAP_ATTR_GSP, DCB_CAP_ATTR_BCN, + DCB_CAP_ATTR_DCBX, __DCB_CAP_ATTR_ENUM_MAX, DCB_CAP_ATTR_MAX = __DCB_CAP_ATTR_ENUM_MAX - 1, }; +/** + * DCBX capability flags + * + * @DCB_CAP_DCBX_HOST: DCBX negotiation is performed by the host LLDP agent. + * 'set' routines are used to configure the device with + * the negotiated parameters + * + * @DCB_CAP_DCBX_LLD_MANAGED: DCBX negotiation is not performed in the host but + * by another entity + * 'get' routines are used to retrieve the + * negotiated parameters + * 'set' routines can be used to set the initial + * negotiation configuration + * + * @DCB_CAP_DCBX_VER_CEE: for a non-host DCBX engine, indicates the engine + * supports the CEE protocol flavor + * + * @DCB_CAP_DCBX_VER_IEEE: for a non-host DCBX engine, indicates the engine + * supports the IEEE protocol flavor + * + * @DCB_CAP_DCBX_STATIC: for a non-host DCBX engine, indicates the engine + * supports static configuration (i.e no actual + * negotiation is performed negotiated parameters equal + * the initial configuration) + * + */ +#define DCB_CAP_DCBX_HOST 0x01 +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#define DCB_CAP_DCBX_VER_CEE 0x04 +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#define DCB_CAP_DCBX_STATIC 0x10 + /** * enum dcbnl_numtcs_attrs - number of traffic classes * diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index ab7d623a2793..c65347b3cbbf 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -70,6 +70,11 @@ struct dcbnl_rtnl_ops { void (*setbcnrp)(struct net_device *, int, u8); u8 (*setapp)(struct net_device *, u8, u16, u8); u8 (*getapp)(struct net_device *, u8, u16); + + /* DCBX configuration */ + u8 (*getdcbx)(struct net_device *); + u8 (*setdcbx)(struct net_device *, u8); + }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 69144125fc4f..8f83ad859d9b 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -68,6 +68,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_BCN] = {.type = NLA_NESTED}, [DCB_ATTR_APP] = {.type = NLA_NESTED}, [DCB_ATTR_IEEE] = {.type = NLA_NESTED}, + [DCB_ATTR_DCBX] = {.type = NLA_U8}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -124,6 +125,7 @@ static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { [DCB_CAP_ATTR_PFC_TCS] = {.type = NLA_U8}, [DCB_CAP_ATTR_GSP] = {.type = NLA_U8}, [DCB_CAP_ATTR_BCN] = {.type = NLA_U8}, + [DCB_CAP_ATTR_DCBX] = {.type = NLA_U8}, }; /* DCB capabilities nested attributes. */ @@ -1271,6 +1273,39 @@ nlmsg_failure: return -1; } +/* DCBX configuration */ +static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + + if (!netdev->dcbnl_ops->getdcbx) + return ret; + + ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB, + DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags); + + return ret; +} + +static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + int ret = -EINVAL; + u8 value; + + if (!tb[DCB_ATTR_DCBX] || !netdev->dcbnl_ops->setdcbx) + return ret; + + value = nla_get_u8(tb[DCB_ATTR_DCBX]); + + ret = dcbnl_reply(netdev->dcbnl_ops->setdcbx(netdev, value), + RTM_SETDCB, DCB_CMD_SDCBX, DCB_ATTR_DCBX, + pid, seq, flags); + + return ret; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1384,6 +1419,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_GDCBX: + ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SDCBX: + ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3-59-g8ed1b From ea45fe4e176a42d2396878f530cfdc8265bef37b Mon Sep 17 00:00:00 2001 From: Shmulik Ravid Date: Thu, 30 Dec 2010 06:26:55 +0000 Subject: dcbnl: adding DCBX feature flags get-set Adding a pair of set-get routines to dcbnl for setting the negotiation flags of the various DCB features. Conforms to the CEE flavor of DCBX The user sets these flags (enable, advertise, willing) for each feature to be used by the DCBX engine. The 'get' routine returns which of the features is enabled after the negotiation. This patch is dependent on the following patches: [net-next-2.6 PATCH 1/3] dcbnl: add support for ieee8021Qaz attributes [net-next-2.6 PATCH 2/3] dcbnl: add appliction tlv handlers [net-next-2.6 PATCH 3/3] net_dcb: add application notifiers Signed-off-by: Shmulik Ravid Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 33 +++++++++++++ include/net/dcbnl.h | 3 ++ net/dcb/dcbnl.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 169 insertions(+) (limited to 'include/net') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index 16eea36d8934..68cd248f6d3e 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -137,6 +137,8 @@ struct dcbmsg { * @DCB_CMD_IEEE_GET: get IEEE 802.1Qaz configuration * @DCB_CMD_GDCBX: get DCBX engine configuration * @DCB_CMD_SDCBX: set DCBX engine configuration + * @DCB_CMD_GFEATCFG: get DCBX features flags + * @DCB_CMD_SFEATCFG: set DCBX features negotiation flags */ enum dcbnl_commands { DCB_CMD_UNDEFINED, @@ -176,6 +178,9 @@ enum dcbnl_commands { DCB_CMD_GDCBX, DCB_CMD_SDCBX, + DCB_CMD_GFEATCFG, + DCB_CMD_SFEATCFG, + __DCB_CMD_ENUM_MAX, DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1, }; @@ -197,6 +202,7 @@ enum dcbnl_commands { * @DCB_ATTR_BCN: backward congestion notification configuration (NLA_NESTED) * @DCB_ATTR_IEEE: IEEE 802.1Qaz supported attributes (NLA_NESTED) * @DCB_ATTR_DCBX: DCBX engine configuration in the device (NLA_U8) + * @DCB_ATTR_FEATCFG: DCBX features flags (NLA_NESTED) */ enum dcbnl_attrs { DCB_ATTR_UNDEFINED, @@ -218,6 +224,7 @@ enum dcbnl_attrs { DCB_ATTR_IEEE, DCB_ATTR_DCBX, + DCB_ATTR_FEATCFG, __DCB_ATTR_ENUM_MAX, DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1, @@ -506,4 +513,30 @@ enum dcbnl_app_attrs { DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1, }; +/** + * enum dcbnl_featcfg_attrs - features conifiguration flags + * + * @DCB_FEATCFG_ATTR_UNDEFINED: unspecified attribute to catch errors + * @DCB_FEATCFG_ATTR_ALL: (NLA_FLAG) all features configuration attributes + * @DCB_FEATCFG_ATTR_PG: (NLA_U8) configuration flags for priority groups + * @DCB_FEATCFG_ATTR_PFC: (NLA_U8) configuration flags for priority + * flow control + * @DCB_FEATCFG_ATTR_APP: (NLA_U8) configuration flags for application TLV + * + */ +#define DCB_FEATCFG_ERROR 0x01 /* error in feature resolution */ +#define DCB_FEATCFG_ENABLE 0x02 /* enable feature */ +#define DCB_FEATCFG_WILLING 0x04 /* feature is willing */ +#define DCB_FEATCFG_ADVERTISE 0x08 /* advertise feature */ +enum dcbnl_featcfg_attrs { + DCB_FEATCFG_ATTR_UNDEFINED, + DCB_FEATCFG_ATTR_ALL, + DCB_FEATCFG_ATTR_PG, + DCB_FEATCFG_ATTR_PFC, + DCB_FEATCFG_ATTR_APP, + + __DCB_FEATCFG_ATTR_ENUM_MAX, + DCB_FEATCFG_ATTR_MAX = __DCB_FEATCFG_ATTR_ENUM_MAX - 1, +}; + #endif /* __LINUX_DCBNL_H__ */ diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index c65347b3cbbf..a8e7852b10ab 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -70,11 +70,14 @@ struct dcbnl_rtnl_ops { void (*setbcnrp)(struct net_device *, int, u8); u8 (*setapp)(struct net_device *, u8, u16, u8); u8 (*getapp)(struct net_device *, u8, u16); + u8 (*getfeatcfg)(struct net_device *, int, u8 *); + u8 (*setfeatcfg)(struct net_device *, int, u8); /* DCBX configuration */ u8 (*getdcbx)(struct net_device *); u8 (*setdcbx)(struct net_device *, u8); + }; #endif /* __NET_DCBNL_H__ */ diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 8f83ad859d9b..075af0a08d84 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -69,6 +69,7 @@ static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_APP] = {.type = NLA_NESTED}, [DCB_ATTR_IEEE] = {.type = NLA_NESTED}, [DCB_ATTR_DCBX] = {.type = NLA_U8}, + [DCB_ATTR_FEATCFG] = {.type = NLA_NESTED}, }; /* DCB priority flow control to User Priority nested attributes */ @@ -182,6 +183,14 @@ static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = { [DCB_ATTR_IEEE_APP] = {.len = sizeof(struct dcb_app)}, }; +/* DCB number of traffic classes nested attributes. */ +static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = { + [DCB_FEATCFG_ATTR_ALL] = {.type = NLA_FLAG}, + [DCB_FEATCFG_ATTR_PG] = {.type = NLA_U8}, + [DCB_FEATCFG_ATTR_PFC] = {.type = NLA_U8}, + [DCB_FEATCFG_ATTR_APP] = {.type = NLA_U8}, +}; + static LIST_HEAD(dcb_app_list); static DEFINE_SPINLOCK(dcb_lock); @@ -1306,6 +1315,122 @@ static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb, return ret; } +static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct sk_buff *dcbnl_skb; + struct nlmsghdr *nlh; + struct dcbmsg *dcb; + struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest; + u8 value; + int ret = -EINVAL; + int i; + int getall = 0; + + if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->getfeatcfg) + return ret; + + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest); + if (ret) { + ret = -EINVAL; + goto err_out; + } + + dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!dcbnl_skb) { + ret = -EINVAL; + goto err_out; + } + + nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); + + dcb = NLMSG_DATA(nlh); + dcb->dcb_family = AF_UNSPEC; + dcb->cmd = DCB_CMD_GFEATCFG; + + nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG); + if (!nest) { + ret = -EINVAL; + goto err; + } + + if (data[DCB_FEATCFG_ATTR_ALL]) + getall = 1; + + for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) { + if (!getall && !data[i]) + continue; + + ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value); + if (!ret) { + ret = nla_put_u8(dcbnl_skb, i, value); + + if (ret) { + nla_nest_cancel(dcbnl_skb, nest); + ret = -EINVAL; + goto err; + } + } else + goto err; + } + nla_nest_end(dcbnl_skb, nest); + + nlmsg_end(dcbnl_skb, nlh); + + ret = rtnl_unicast(dcbnl_skb, &init_net, pid); + if (ret) { + ret = -EINVAL; + goto err_out; + } + + return 0; +nlmsg_failure: +err: + kfree_skb(dcbnl_skb); +err_out: + return ret; +} + +static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb, + u32 pid, u32 seq, u16 flags) +{ + struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1]; + int ret = -EINVAL; + u8 value; + int i; + + if (!tb[DCB_ATTR_FEATCFG] || !netdev->dcbnl_ops->setfeatcfg) + return ret; + + ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], + dcbnl_featcfg_nest); + + if (ret) { + ret = -EINVAL; + goto err; + } + + for (i = DCB_FEATCFG_ATTR_ALL+1; i <= DCB_FEATCFG_ATTR_MAX; i++) { + if (data[i] == NULL) + continue; + + value = nla_get_u8(data[i]); + + ret = netdev->dcbnl_ops->setfeatcfg(netdev, i, value); + + if (ret) + goto operr; + } + +operr: + ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SFEATCFG, + DCB_ATTR_FEATCFG, pid, seq, flags); + +err: + return ret; +} + static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); @@ -1427,6 +1552,14 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq, nlh->nlmsg_flags); goto out; + case DCB_CMD_GFEATCFG: + ret = dcbnl_getfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; + case DCB_CMD_SFEATCFG: + ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags); + goto out; default: goto errout; } -- cgit v1.2.3-59-g8ed1b From 21f83589644bb2ed98079bf1e2154c8e70ca6a6c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 18 Dec 2010 17:20:47 +0100 Subject: mac80211: implement hardware offload for remain-on-channel This allows drivers to support remain-on-channel offload if they implement smarter timing or need to use a device implementation like iwlwifi. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 19 +++++++++++ net/mac80211/cfg.c | 83 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/driver-ops.h | 30 ++++++++++++++++ net/mac80211/driver-trace.h | 80 +++++++++++++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 8 +++++ net/mac80211/iface.c | 9 +++-- net/mac80211/main.c | 5 ++- net/mac80211/offchannel.c | 75 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 306 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 479c35e160e3..5b3fd5add7a4 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -365,6 +365,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_INTFL_NL80211_FRAME_TX = BIT(21), IEEE80211_TX_CTL_LDPC = BIT(22), IEEE80211_TX_CTL_STBC = BIT(23) | BIT(24), + IEEE80211_TX_CTL_TX_OFFCHAN = BIT(25), }; #define IEEE80211_TX_CTL_STBC_SHIFT 23 @@ -1824,6 +1825,12 @@ struct ieee80211_ops { int (*napi_poll)(struct ieee80211_hw *hw, int budget); int (*set_antenna)(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant); int (*get_antenna)(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant); + + int (*remain_on_channel)(struct ieee80211_hw *hw, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + int duration); + int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); }; /** @@ -2729,6 +2736,18 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, */ void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); +/** + * ieee80211_ready_on_channel - notification of remain-on-channel start + * @hw: pointer as obtained from ieee80211_alloc_hw() + */ +void ieee80211_ready_on_channel(struct ieee80211_hw *hw); + +/** + * ieee80211_remain_on_channel_expired - remain_on_channel duration expired + * @hw: pointer as obtained from ieee80211_alloc_hw() + */ +void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw); + /* Rate control API */ /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5892b0302454..168a6ba8fc28 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1593,6 +1593,37 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return 0; } +static int ieee80211_remain_on_channel_hw(struct ieee80211_local *local, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, + unsigned int duration, u64 *cookie) +{ + int ret; + u32 random_cookie; + + lockdep_assert_held(&local->mtx); + + if (local->hw_roc_cookie) + return -EBUSY; + /* must be nonzero */ + random_cookie = random32() | 1; + + *cookie = random_cookie; + local->hw_roc_dev = dev; + local->hw_roc_cookie = random_cookie; + local->hw_roc_channel = chan; + local->hw_roc_channel_type = chantype; + local->hw_roc_duration = duration; + ret = drv_remain_on_channel(local, chan, chantype, duration); + if (ret) { + local->hw_roc_channel = NULL; + local->hw_roc_cookie = 0; + } + + return ret; +} + static int ieee80211_remain_on_channel(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, @@ -1601,16 +1632,62 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (local->ops->remain_on_channel) { + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_remain_on_channel_hw(local, dev, + chan, channel_type, + duration, cookie); + mutex_unlock(&local->mtx); + + return ret; + } return ieee80211_wk_remain_on_channel(sdata, chan, channel_type, duration, cookie); } +static int ieee80211_cancel_remain_on_channel_hw(struct ieee80211_local *local, + u64 cookie) +{ + int ret; + + lockdep_assert_held(&local->mtx); + + if (local->hw_roc_cookie != cookie) + return -ENOENT; + + ret = drv_cancel_remain_on_channel(local); + if (ret) + return ret; + + local->hw_roc_cookie = 0; + local->hw_roc_channel = NULL; + + ieee80211_recalc_idle(local); + + return 0; +} + static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, struct net_device *dev, u64 cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + if (local->ops->cancel_remain_on_channel) { + int ret; + + mutex_lock(&local->mtx); + ret = ieee80211_cancel_remain_on_channel_hw(local, cookie); + mutex_unlock(&local->mtx); + + return ret; + } return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } @@ -1662,6 +1739,12 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, channel_type != local->_oper_channel_type)) is_offchan = true; + if (chan == local->hw_roc_channel) { + /* TODO: check channel type? */ + is_offchan = false; + flags |= IEEE80211_TX_CTL_TX_OFFCHAN; + } + if (is_offchan && !offchan) return -EBUSY; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index af0c4398cceb..98d589960a49 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -465,4 +465,34 @@ static inline int drv_get_antenna(struct ieee80211_local *local, return ret; } +static inline int drv_remain_on_channel(struct ieee80211_local *local, + struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, + unsigned int duration) +{ + int ret; + + might_sleep(); + + trace_drv_remain_on_channel(local, chan, chantype, duration); + ret = local->ops->remain_on_channel(&local->hw, chan, chantype, + duration); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) +{ + int ret; + + might_sleep(); + + trace_drv_cancel_remain_on_channel(local); + ret = local->ops->cancel_remain_on_channel(&local->hw); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index c2772f23ac9c..49c84218b2f4 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -933,6 +933,50 @@ TRACE_EVENT(drv_get_antenna, ) ); +TRACE_EVENT(drv_remain_on_channel, + TP_PROTO(struct ieee80211_local *local, struct ieee80211_channel *chan, + enum nl80211_channel_type chantype, unsigned int duration), + + TP_ARGS(local, chan, chantype, duration), + + TP_STRUCT__entry( + LOCAL_ENTRY + __field(int, center_freq) + __field(int, channel_type) + __field(unsigned int, duration) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + __entry->center_freq = chan->center_freq; + __entry->channel_type = chantype; + __entry->duration = duration; + ), + + TP_printk( + LOCAL_PR_FMT " freq:%dMHz duration:%dms", + LOCAL_PR_ARG, __entry->center_freq, __entry->duration + ) +); + +TRACE_EVENT(drv_cancel_remain_on_channel, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + /* * Tracing for API calls that drivers call. */ @@ -1170,6 +1214,42 @@ TRACE_EVENT(api_chswitch_done, ) ); +TRACE_EVENT(api_ready_on_channel, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + +TRACE_EVENT(api_remain_on_channel_expired, + TP_PROTO(struct ieee80211_local *local), + + TP_ARGS(local), + + TP_STRUCT__entry( + LOCAL_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT, LOCAL_PR_ARG + ) +); + /* * Tracing for internal functions * (which may also be called in response to driver calls) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 95cdd2a3f809..f866af8de5ac 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -951,6 +951,13 @@ struct ieee80211_local { } debugfs; #endif + struct ieee80211_channel *hw_roc_channel; + struct net_device *hw_roc_dev; + struct work_struct hw_roc_start, hw_roc_done; + enum nl80211_channel_type hw_roc_channel_type; + unsigned int hw_roc_duration; + u32 hw_roc_cookie; + /* dummy netdev for use w/ NAPI */ struct net_device napi_dev; @@ -1142,6 +1149,7 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local); void ieee80211_offchannel_stop_station(struct ieee80211_local *local); void ieee80211_offchannel_return(struct ieee80211_local *local, bool enable_beaconing); +void ieee80211_hw_roc_setup(struct ieee80211_local *local); /* interface handling */ int ieee80211_iface_init(void); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b6db237672ff..8acba456744e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1264,7 +1264,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; - bool working = false, scanning = false; + bool working = false, scanning = false, hw_roc = false; struct ieee80211_work *wk; unsigned int led_trig_start = 0, led_trig_stop = 0; @@ -1308,6 +1308,9 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) local->scan_sdata->vif.bss_conf.idle = false; } + if (local->hw_roc_channel) + hw_roc = true; + list_for_each_entry(sdata, &local->interfaces, list) { if (sdata->old_idle == sdata->vif.bss_conf.idle) continue; @@ -1316,7 +1319,7 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); } - if (working || scanning) + if (working || scanning || hw_roc) led_trig_start |= IEEE80211_TPT_LEDTRIG_FL_WORK; else led_trig_stop |= IEEE80211_TPT_LEDTRIG_FL_WORK; @@ -1328,6 +1331,8 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) ieee80211_mod_tpt_led_trig(local, led_trig_start, led_trig_stop); + if (hw_roc) + return ieee80211_idle_off(local, "hw remain-on-channel"); if (working) return ieee80211_idle_off(local, "working"); if (scanning) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4b088b3c25e8..485d36bc9a46 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -609,6 +609,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, ieee80211_led_names(local); + ieee80211_hw_roc_setup(local); + return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); @@ -753,7 +755,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } - local->hw.wiphy->max_remain_on_channel_duration = 5000; + if (!local->ops->remain_on_channel) + local->hw.wiphy->max_remain_on_channel_duration = 5000; result = wiphy_register(local->hw.wiphy); if (result < 0) diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 4b564091e51d..49b9ec22d9b6 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -14,6 +14,7 @@ */ #include #include "ieee80211_i.h" +#include "driver-trace.h" /* * inform AP that we will go to sleep so that it will buffer the frames @@ -190,3 +191,77 @@ void ieee80211_offchannel_return(struct ieee80211_local *local, } mutex_unlock(&local->iflist_mtx); } + +static void ieee80211_hw_roc_start(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_start); + + mutex_lock(&local->mtx); + + if (!local->hw_roc_channel) { + mutex_unlock(&local->mtx); + return; + } + + ieee80211_recalc_idle(local); + + cfg80211_ready_on_channel(local->hw_roc_dev, local->hw_roc_cookie, + local->hw_roc_channel, + local->hw_roc_channel_type, + local->hw_roc_duration, + GFP_KERNEL); + mutex_unlock(&local->mtx); +} + +void ieee80211_ready_on_channel(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_ready_on_channel(local); + + ieee80211_queue_work(hw, &local->hw_roc_start); +} +EXPORT_SYMBOL_GPL(ieee80211_ready_on_channel); + +static void ieee80211_hw_roc_done(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, hw_roc_done); + + mutex_lock(&local->mtx); + + if (!local->hw_roc_channel) { + mutex_unlock(&local->mtx); + return; + } + + cfg80211_remain_on_channel_expired(local->hw_roc_dev, + local->hw_roc_cookie, + local->hw_roc_channel, + local->hw_roc_channel_type, + GFP_KERNEL); + + local->hw_roc_channel = NULL; + local->hw_roc_cookie = 0; + + ieee80211_recalc_idle(local); + + mutex_unlock(&local->mtx); +} + +void ieee80211_remain_on_channel_expired(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_remain_on_channel_expired(local); + + ieee80211_queue_work(hw, &local->hw_roc_done); +} +EXPORT_SYMBOL_GPL(ieee80211_remain_on_channel_expired); + +void ieee80211_hw_roc_setup(struct ieee80211_local *local) +{ + INIT_WORK(&local->hw_roc_start, ieee80211_hw_roc_start); + INIT_WORK(&local->hw_roc_done, ieee80211_hw_roc_done); +} -- cgit v1.2.3-59-g8ed1b From f682cefa5ad204d3bfaa54a58046c66d2d035ac1 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Wed, 5 Jan 2011 04:23:23 +0000 Subject: netfilter: fix the race when initializing nf_ct_expect_hash_rnd Since nf_ct_expect_dst_hash() may be called without nf_conntrack_lock locked, nf_ct_expect_hash_rnd should be initialized in the atomic way. In this patch, we use nf_conntrack_hash_rnd instead of nf_ct_expect_hash_rnd. Signed-off-by: Changli Gao Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/netfilter/nf_conntrack.h | 2 ++ net/netfilter/nf_conntrack_core.c | 30 +++++++++++++++++------------- net/netfilter/nf_conntrack_expect.c | 10 +++------- 3 files changed, 22 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index caf17db87dbc..d85cff10e169 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -298,6 +298,8 @@ static inline int nf_ct_is_untracked(const struct nf_conn *ct) extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern unsigned int nf_conntrack_htable_size; extern unsigned int nf_conntrack_max; +extern unsigned int nf_conntrack_hash_rnd; +void init_nf_conntrack_hash_rnd(void); #define NF_CT_STAT_INC(net, count) \ __this_cpu_inc((net)->ct.stat->count) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27a5ea6b6a0f..e61511929c66 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max); DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); -static unsigned int nf_conntrack_hash_rnd __read_mostly; +unsigned int nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) { @@ -596,6 +596,21 @@ static noinline int early_drop(struct net *net, unsigned int hash) return dropped; } +void init_nf_conntrack_hash_rnd(void) +{ + unsigned int rand; + + /* + * Why not initialize nf_conntrack_rnd in a "init()" function ? + * Because there isn't enough entropy when system initializing, + * and we initialize it as late as possible. + */ + do { + get_random_bytes(&rand, sizeof(rand)); + } while (!rand); + cmpxchg(&nf_conntrack_hash_rnd, 0, rand); +} + static struct nf_conn * __nf_conntrack_alloc(struct net *net, u16 zone, const struct nf_conntrack_tuple *orig, @@ -605,18 +620,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone, struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd)) { - unsigned int rand; - - /* - * Why not initialize nf_conntrack_rnd in a "init()" function ? - * Because there isn't enough entropy when system initializing, - * and we initialize it as late as possible. - */ - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&nf_conntrack_hash_rnd, 0, rand); - + init_nf_conntrack_hash_rnd(); /* recompute the hash as nf_conntrack_hash_rnd is initialized */ hash = hash_conntrack_raw(orig, zone); } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 46e8966912b1..a20fb0bd1efe 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -32,9 +32,7 @@ unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); -static unsigned int nf_ct_expect_hash_rnd __read_mostly; unsigned int nf_ct_expect_max __read_mostly; -static int nf_ct_expect_hash_rnd_initted __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -77,15 +75,13 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple { unsigned int hash; - if (unlikely(!nf_ct_expect_hash_rnd_initted)) { - get_random_bytes(&nf_ct_expect_hash_rnd, - sizeof(nf_ct_expect_hash_rnd)); - nf_ct_expect_hash_rnd_initted = 1; + if (unlikely(!nf_conntrack_hash_rnd)) { + init_nf_conntrack_hash_rnd(); } hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd); + (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd); return ((u64)hash * nf_ct_expect_hsize) >> 32; } -- cgit v1.2.3-59-g8ed1b From 61b1ab4583e275af216c8454b9256de680499b19 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:42 +0100 Subject: IPVS: netns, add basic init per netns. Preparation for network name-space init, in this stage some empty functions exists. In most files there is a check if it is root ns i.e. init_net if (!net_eq(net, &init_net)) return ... this will be removed by the last patch, when enabling name-space. *v3 ip_vs_conn.c merge error corrected. net_ipvs #ifdef removed as sugested by Jan Engelhardt [ horms@verge.net.au: Removed whitespace-change-only hunks ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 11 +++++++ include/net/net_namespace.h | 2 ++ include/net/netns/ip_vs.h | 25 ++++++++++++++++ net/netfilter/ipvs/ip_vs_app.c | 28 +++++++++++++++--- net/netfilter/ipvs/ip_vs_conn.c | 34 ++++++++++++++++++---- net/netfilter/ipvs/ip_vs_core.c | 63 ++++++++++++++++++++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 49 +++++++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_est.c | 20 ++++++++++++- net/netfilter/ipvs/ip_vs_ftp.c | 34 +++++++++++++++++++--- net/netfilter/ipvs/ip_vs_lblc.c | 37 +++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_lblcr.c | 38 +++++++++++++++++++++--- net/netfilter/ipvs/ip_vs_proto.c | 19 ++++++++++++ net/netfilter/ipvs/ip_vs_sync.c | 27 +++++++++++++++++ 13 files changed, 354 insertions(+), 33 deletions(-) create mode 100644 include/net/netns/ip_vs.h (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d858264217ba..c1c2ece3ed94 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -28,6 +28,15 @@ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif +#include /* Netw namespace */ + +/* + * Generic access of ipvs struct + */ +static inline struct netns_ipvs *net_ipvs(struct net* net) +{ + return net->ipvs; +} /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -922,6 +931,8 @@ extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); extern int stop_sync_thread(int state); extern void ip_vs_sync_conn(struct ip_vs_conn *cp); +extern int ip_vs_sync_init(void); +extern void ip_vs_sync_cleanup(void); /* diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 1bf812b21fb7..b3b4a34cb2cc 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -20,6 +20,7 @@ #include #endif #include +#include struct proc_dir_entry; struct net_device; @@ -94,6 +95,7 @@ struct net { #ifdef CONFIG_XFRM struct netns_xfrm xfrm; #endif + struct netns_ipvs *ipvs; }; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h new file mode 100644 index 000000000000..12fe84087cec --- /dev/null +++ b/include/net/netns/ip_vs.h @@ -0,0 +1,25 @@ +/* + * IP Virtual Server + * Data structure for network namspace + * + */ + +#ifndef IP_VS_H_ +#define IP_VS_H_ + +#include +#include +#include +#include +#include +#include + +struct ip_vs_stats; +struct ip_vs_sync_buff; +struct ctl_table_header; + +struct netns_ipvs { + int gen; /* Generation */ +}; + +#endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index a475edee0912..40b09ccc4896 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -569,15 +569,35 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -int __init ip_vs_app_init(void) +static int __net_init __ip_vs_app_init(struct net *net) { - /* we will replace it with proc_net_ipvs_create() soon */ - proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } +static void __net_exit __ip_vs_app_cleanup(struct net *net) +{ + proc_net_remove(net, "ip_vs_app"); +} + +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_app_init, + .exit = __ip_vs_app_cleanup, +}; + +int __init ip_vs_app_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + return rv; +} + void ip_vs_app_cleanup(void) { - proc_net_remove(&init_net, "ip_vs_app"); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 66e4662925d5..7c1b502f8d8d 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1201,11 +1201,36 @@ static void ip_vs_conn_flush(void) goto flush_again; } } +/* + * per netns init and exit + */ +int __net_init __ip_vs_conn_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); + proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + return 0; +} + +static void __net_exit __ip_vs_conn_cleanup(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + proc_net_remove(net, "ip_vs_conn"); + proc_net_remove(net, "ip_vs_conn_sync"); +} +static struct pernet_operations ipvs_conn_ops = { + .init = __ip_vs_conn_init, + .exit = __ip_vs_conn_cleanup, +}; int __init ip_vs_conn_init(void) { int idx; + int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1243,24 +1268,21 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops); - proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); + retc = register_pernet_subsys(&ipvs_conn_ops); /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return 0; + return retc; } - void ip_vs_conn_cleanup(void) { + unregister_pernet_subsys(&ipvs_conn_ops); /* flush all the connection entries first */ ip_vs_conn_flush(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - proc_net_remove(&init_net, "ip_vs_conn"); - proc_net_remove(&init_net, "ip_vs_conn_sync"); vfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5287771d0647..206f40c548d7 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -41,6 +41,7 @@ #include /* for icmp_send */ #include #include +#include /* net_generic() */ #include #include @@ -68,6 +69,12 @@ EXPORT_SYMBOL(ip_vs_conn_put); EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +int ip_vs_net_id __read_mostly; +#ifdef IP_VS_GENERIC_NETNS +EXPORT_SYMBOL(ip_vs_net_id); +#endif +/* netns cnt used for uniqueness */ +static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) @@ -1813,6 +1820,44 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { #endif }; +/* + * Initialize IP Virtual Server netns mem. + */ +static int __net_init __ip_vs_init(struct net *net) +{ + struct netns_ipvs *ipvs; + + if (!net_eq(net, &init_net)) { + pr_err("The final patch for enabling netns is missing\n"); + return -EPERM; + } + ipvs = net_generic(net, ip_vs_net_id); + if (ipvs == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + /* Counters used for creating unique names */ + ipvs->gen = atomic_read(&ipvs_netns_cnt); + atomic_inc(&ipvs_netns_cnt); + net->ipvs = ipvs; + printk(KERN_INFO "IPVS: Creating netns size=%lu id=%d\n", + sizeof(struct netns_ipvs), ipvs->gen); + return 0; +} + +static void __net_exit __ip_vs_cleanup(struct net *net) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + IP_VS_DBG(10, "ipvs netns %d released\n", ipvs->gen); +} + +static struct pernet_operations ipvs_core_ops = { + .init = __ip_vs_init, + .exit = __ip_vs_cleanup, + .id = &ip_vs_net_id, + .size = sizeof(struct netns_ipvs), +}; /* * Initialize IP Virtual Server @@ -1821,8 +1866,11 @@ static int __init ip_vs_init(void) { int ret; - ip_vs_estimator_init(); + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + return ret; + ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { pr_err("can't setup control.\n"); @@ -1843,15 +1891,23 @@ static int __init ip_vs_init(void) goto cleanup_app; } + ret = ip_vs_sync_init(); + if (ret < 0) { + pr_err("can't setup sync data.\n"); + goto cleanup_conn; + } + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_conn; + goto cleanup_sync; } pr_info("ipvs loaded.\n"); return ret; +cleanup_sync: + ip_vs_sync_cleanup(); cleanup_conn: ip_vs_conn_cleanup(); cleanup_app: @@ -1861,17 +1917,20 @@ static int __init ip_vs_init(void) ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ca49e928f302..ceeef4352d34 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3406,6 +3406,42 @@ static void ip_vs_genl_unregister(void) /* End of Generic Netlink interface definitions */ +/* + * per netns intit/exit func. + */ +int __net_init __ip_vs_control_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); + proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars); + if (sysctl_header == NULL) + goto err_reg; + ip_vs_new_estimator(&ip_vs_stats); + return 0; + +err_reg: + return -ENOMEM; +} + +static void __net_exit __ip_vs_control_cleanup(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + ip_vs_kill_estimator(&ip_vs_stats); + unregister_net_sysctl_table(sysctl_header); + proc_net_remove(net, "ip_vs_stats"); + proc_net_remove(net, "ip_vs"); +} + +static struct pernet_operations ipvs_control_ops = { + .init = __ip_vs_control_init, + .exit = __ip_vs_control_cleanup, +}; int __init ip_vs_control_init(void) { @@ -3437,12 +3473,9 @@ int __init ip_vs_control_init(void) return ret; } - proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); - proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); - - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); - - ip_vs_new_estimator(&ip_vs_stats); + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) + return ret; /* Hook the defense timer */ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); @@ -3459,9 +3492,7 @@ void ip_vs_control_cleanup(void) cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); ip_vs_kill_estimator(&ip_vs_stats); - unregister_sysctl_table(sysctl_header); - proc_net_remove(&init_net, "ip_vs_stats"); - proc_net_remove(&init_net, "ip_vs"); + unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index ff28801962e0..7417a0c1408b 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -157,13 +157,31 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->outbps = 0; } +static int __net_init __ip_vs_estimator_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + return 0; +} + +static struct pernet_operations ip_vs_app_ops = { + .init = __ip_vs_estimator_init, +}; + int __init ip_vs_estimator_init(void) { + int rv; + + rv = register_pernet_subsys(&ip_vs_app_ops); + if (rv < 0) + return rv; mod_timer(&est_timer, jiffies + 2 * HZ); - return 0; + return rv; } void ip_vs_estimator_cleanup(void) { del_timer_sync(&est_timer); + unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 84aef65b37d1..0e762f322aa3 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -399,15 +399,17 @@ static struct ip_vs_app ip_vs_ftp = { .pkt_in = ip_vs_ftp_in, }; - /* - * ip_vs_ftp initialization + * per netns ip_vs_ftp initialization */ -static int __init ip_vs_ftp_init(void) +static int __net_init __ip_vs_ftp_init(struct net *net) { int i, ret; struct ip_vs_app *app = &ip_vs_ftp; + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + ret = register_ip_vs_app(app); if (ret) return ret; @@ -427,14 +429,38 @@ static int __init ip_vs_ftp_init(void) return ret; } +/* + * netns exit + */ +static void __ip_vs_ftp_exit(struct net *net) +{ + struct ip_vs_app *app = &ip_vs_ftp; + + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_ip_vs_app(app); +} + +static struct pernet_operations ip_vs_ftp_ops = { + .init = __ip_vs_ftp_init, + .exit = __ip_vs_ftp_exit, +}; +int __init ip_vs_ftp_init(void) +{ + int rv; + + rv = register_pernet_subsys(&ip_vs_ftp_ops); + return rv; +} /* * ip_vs_ftp finish. */ static void __exit ip_vs_ftp_exit(void) { - unregister_ip_vs_app(&ip_vs_ftp); + unregister_pernet_subsys(&ip_vs_ftp_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 9323f8944199..84278fb4e055 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -543,23 +543,54 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .schedule = ip_vs_lblc_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblc_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars_table); + if (!sysctl_header) + return -ENOMEM; + + return 0; +} + +static void __net_exit __ip_vs_lblc_exit(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_net_sysctl_table(sysctl_header); +} + +static struct pernet_operations ip_vs_lblc_ops = { + .init = __ip_vs_lblc_init, + .exit = __ip_vs_lblc_exit, +}; static int __init ip_vs_lblc_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblc_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblc_ops); return ret; } static void __exit ip_vs_lblc_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); + unregister_pernet_subsys(&ip_vs_lblc_ops); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index dbeed8ea421a..7c7396a6acbf 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -744,23 +744,53 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .schedule = ip_vs_lblcr_schedule, }; +/* + * per netns init. + */ +static int __net_init __ip_vs_lblcr_init(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + vs_vars_table); + if (!sysctl_header) + return -ENOMEM; + + return 0; +} + +static void __net_exit __ip_vs_lblcr_exit(struct net *net) +{ + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + + unregister_net_sysctl_table(sysctl_header); +} + +static struct pernet_operations ip_vs_lblcr_ops = { + .init = __ip_vs_lblcr_init, + .exit = __ip_vs_lblcr_exit, +}; static int __init ip_vs_lblcr_init(void) { int ret; - sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table); + ret = register_pernet_subsys(&ip_vs_lblcr_ops); + if (ret) + return ret; + ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler); if (ret) - unregister_sysctl_table(sysctl_header); + unregister_pernet_subsys(&ip_vs_lblcr_ops); return ret; } - static void __exit ip_vs_lblcr_cleanup(void) { - unregister_sysctl_table(sysctl_header); unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); + unregister_pernet_subsys(&ip_vs_lblcr_ops); } diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index c53998390877..45392942d0e7 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -236,6 +236,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); } +/* + * per network name-space init + */ +static int __net_init __ip_vs_protocol_init(struct net *net) +{ + return 0; +} + +static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +{ + /* empty */ +} + +static struct pernet_operations ipvs_proto_ops = { + .init = __ip_vs_protocol_init, + .exit = __ip_vs_protocol_cleanup, +}; int __init ip_vs_protocol_init(void) { @@ -265,6 +282,7 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); + return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -275,6 +293,7 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; + unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c1c167ab73ee..3668739a6d06 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1639,3 +1639,30 @@ int stop_sync_thread(int state) return 0; } + +/* + * Initialize data struct for each netns + */ +static int __net_init __ip_vs_sync_init(struct net *net) +{ + return 0; +} + +static void __ip_vs_sync_cleanup(struct net *net) +{ +} +static struct pernet_operations ipvs_sync_ops = { + .init = __ip_vs_sync_init, + .exit = __ip_vs_sync_cleanup, +}; + + +int __init ip_vs_sync_init(void) +{ + return register_pernet_subsys(&ipvs_sync_ops); +} + +void __exit ip_vs_sync_cleanup(void) +{ + unregister_pernet_subsys(&ipvs_sync_ops); +} -- cgit v1.2.3-59-g8ed1b From fc723250c9cb046cc19833a2b1c4309bbf59ac36 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:43 +0100 Subject: IPVS: netns to services part 1 Services hash tables got netns ptr a hash arg, While Real Servers (rs) has been moved to ipvs struct. Two new inline functions added to get net ptr from skb. Since ip_vs is called from different contexts there is two places to dig for the net ptr skb->dev or skb->sk this is handled in skb_net() and skb_sknet() Global functions, ip_vs_service_get() ip_vs_lookup_real_service() etc have got struct net *net as first param. If possible get net ptr skb etc, - if not &init_net is used at this early stage of patching. ip_vs_ctl.c procfs not ready for netns yet. *v3 Comments by Julian - __ip_vs_service_find and __ip_vs_svc_fwm_find are fast path, net_eq(svc->net, net) so the check is at the end now. - net = skb_net(skb) in ip_vs_out moved after check for skb_dst. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 64 +++++++++- include/net/netns/ip_vs.h | 8 ++ net/netfilter/ipvs/ip_vs_conn.c | 2 +- net/netfilter/ipvs/ip_vs_core.c | 4 +- net/netfilter/ipvs/ip_vs_ctl.c | 232 +++++++++++++++++++--------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 5 +- net/netfilter/ipvs/ip_vs_sync.c | 2 +- 9 files changed, 214 insertions(+), 115 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c1c2ece3ed94..d551e0d8fd9a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -37,6 +37,59 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) { return net->ipvs; } +/* + * Get net ptr from skb in traffic cases + * use skb_sknet when call is from userland (ioctl or netlink) + */ +static inline struct net *skb_net(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS +#ifdef CONFIG_IP_VS_DEBUG + /* + * This is used for debug only. + * Start with the most likely hit + * End with BUG + */ + if (likely(skb->dev && skb->dev->nd_net)) + return dev_net(skb->dev); + if (skb_dst(skb)->dev) + return dev_net(skb_dst(skb)->dev); + WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n", + __func__, __LINE__); + if (likely(skb->sk && skb->sk->sk_net)) + return sock_net(skb->sk); + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", + __func__, __LINE__); + BUG(); +#else + return dev_net(skb->dev ? : skb_dst(skb)->dev); +#endif +#else + return &init_net; +#endif +} + +static inline struct net *skb_sknet(struct sk_buff *skb) +{ +#ifdef CONFIG_NET_NS +#ifdef CONFIG_IP_VS_DEBUG + /* Start with the most likely hit */ + if (likely(skb->sk && skb->sk->sk_net)) + return sock_net(skb->sk); + WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n", + __func__, __LINE__); + if (likely(skb->dev && skb->dev->nd_net)) + return dev_net(skb->dev); + pr_err("There is no net ptr to find in the skb in %s() line:%d\n", + __func__, __LINE__); + BUG(); +#else + return sock_net(skb->sk); +#endif +#else + return &init_net; +#endif +} /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -496,6 +549,7 @@ struct ip_vs_service { unsigned flags; /* service status flags */ unsigned timeout; /* persistent timeout in ticks */ __be32 netmask; /* grouping granularity */ + struct net *net; struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ @@ -896,7 +950,7 @@ extern int sysctl_ip_vs_sync_ver; extern void ip_vs_sync_switch_mode(int mode); extern struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) @@ -905,7 +959,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport); extern int ip_vs_use_count_inc(void); @@ -913,9 +967,9 @@ extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); extern struct ip_vs_dest * -ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport, - const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, - __u32 fwmark); +ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr, + __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, + __u16 protocol, __u32 fwmark); extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 12fe84087cec..5b87d22a39fb 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -20,6 +20,14 @@ struct ctl_table_header; struct netns_ipvs { int gen; /* Generation */ + /* + * Hash table: for real service lookups + */ + #define IP_VS_RTAB_BITS 4 + #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) + #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) + + struct list_head rs_table[IP_VS_RTAB_SIZE]; }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7c1b502f8d8d..7a0e79e3ad0f 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -611,7 +611,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 206f40c548d7..d0616ea1eebf 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1031,6 +1031,7 @@ drop: static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -1054,6 +1055,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!skb_dst(skb))) return NF_ACCEPT; + net = skb_net(skb); ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1119,7 +1121,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(af, iph.protocol, + if (ip_vs_lookup_real_service(net, af, iph.protocol, &iph.saddr, pptr[0])) { /* diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ceeef4352d34..2d7c96bd2114 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -287,15 +287,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * Hash table: for real service lookups - */ -#define IP_VS_RTAB_BITS 4 -#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) -#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) - -static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; - /* * Trash for destinations */ @@ -311,9 +302,9 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); /* * Returns hash value for virtual service */ -static __inline__ unsigned -ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, - __be16 port) +static inline unsigned +ip_vs_svc_hashkey(struct net *net, int af, unsigned proto, + const union nf_inet_addr *addr, __be16 port) { register unsigned porth = ntohs(port); __be32 addr_fold = addr->ip; @@ -323,6 +314,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif + addr_fold ^= ((size_t)net>>8); return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; @@ -331,13 +323,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, /* * Returns hash value of fwmark for virtual service lookup */ -static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) +static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark) { - return fwmark & IP_VS_SVC_TAB_MASK; + return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; } /* - * Hashes a service in the ip_vs_svc_table by + * Hashes a service in the ip_vs_svc_table by * or in the ip_vs_svc_fwm_table by fwmark. * Should be called with locked tables. */ @@ -353,16 +345,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) if (svc->fwmark == 0) { /* - * Hash it by in ip_vs_svc_table + * Hash it by in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, - svc->port); + hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol, + &svc->addr, svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* - * Hash it by fwmark in ip_vs_svc_fwm_table + * Hash it by fwmark in svc_fwm_table */ - hash = ip_vs_svc_fwm_hashkey(svc->fwmark); + hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark); list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); } @@ -374,7 +366,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* - * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. + * Unhashes a service from svc_table / svc_fwm_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) @@ -386,10 +378,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) } if (svc->fwmark == 0) { - /* Remove it from the ip_vs_svc_table table */ + /* Remove it from the svc_table table */ list_del(&svc->s_list); } else { - /* Remove it from the ip_vs_svc_fwm_table table */ + /* Remove it from the svc_fwm_table table */ list_del(&svc->f_list); } @@ -400,23 +392,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* - * Get service by {proto,addr,port} in the service table. + * Get service by {netns, proto,addr,port} in the service table. */ static inline struct ip_vs_service * -__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, - __be16 vport) +__ip_vs_service_find(struct net *net, int af, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ if ((svc->af == af) && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) - && (svc->protocol == protocol)) { + && (svc->protocol == protocol) + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -430,16 +423,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr, * Get service by {fwmark} in the service table. */ static inline struct ip_vs_service * -__ip_vs_svc_fwm_find(int af, __u32 fwmark) +__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(fwmark); + hash = ip_vs_svc_fwm_hashkey(net, fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af) { + if (svc->fwmark == fwmark && svc->af == af + && net_eq(svc->net, net)) { /* HIT */ return svc; } @@ -449,7 +443,7 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, +ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -459,14 +453,15 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark))) + svc = __ip_vs_svc_fwm_find(net, af, fwmark); + if (fwmark && svc) goto out; /* * Check the table hashed by * for "full" addressed entries */ - svc = __ip_vs_service_find(af, protocol, vaddr, vport); + svc = __ip_vs_service_find(net, af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -476,7 +471,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT); + svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -484,7 +479,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_find(af, protocol, vaddr, 0); + svc = __ip_vs_service_find(net, af, protocol, vaddr, 0); } out: @@ -545,10 +540,10 @@ static inline unsigned ip_vs_rs_hashkey(int af, } /* - * Hashes ip_vs_dest in ip_vs_rtable by . + * Hashes ip_vs_dest in rs_table by . * should be called with locked tables. */ -static int ip_vs_rs_hash(struct ip_vs_dest *dest) +static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) { unsigned hash; @@ -562,19 +557,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) */ hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); - list_add(&dest->d_list, &ip_vs_rtable[hash]); + list_add(&dest->d_list, &ipvs->rs_table[hash]); return 1; } /* - * UNhashes ip_vs_dest from ip_vs_rtable. + * UNhashes ip_vs_dest from rs_table. * should be called with locked tables. */ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) { /* - * Remove it from the ip_vs_rtable table. + * Remove it from the rs_table table. */ if (!list_empty(&dest->d_list)) { list_del(&dest->d_list); @@ -588,10 +583,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(int af, __u16 protocol, +ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, const union nf_inet_addr *daddr, __be16 dport) { + struct netns_ipvs *ipvs = net_ipvs(net); unsigned hash; struct ip_vs_dest *dest; @@ -602,7 +598,7 @@ ip_vs_lookup_real_service(int af, __u16 protocol, hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); - list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { + list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) @@ -652,7 +648,8 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, +struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af, + const union nf_inet_addr *daddr, __be16 dport, const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol, __u32 fwmark) @@ -660,7 +657,7 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport); + svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -768,6 +765,7 @@ static void __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest, int add) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); int conn_flags; /* set the weight and the flags */ @@ -780,11 +778,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* - * Put the real service in ip_vs_rtable if not present. + * Put the real service in rs_table if not present. * For now only for NAT! */ write_lock_bh(&__ip_vs_rs_lock); - ip_vs_rs_hash(dest); + ip_vs_rs_hash(ipvs, dest); write_unlock_bh(&__ip_vs_rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1117,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user_kern *u, +ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { int ret = 0; @@ -1172,6 +1170,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, svc->flags = u->flags; svc->timeout = u->timeout * HZ; svc->netmask = u->netmask; + svc->net = net; INIT_LIST_HEAD(&svc->destinations); rwlock_init(&svc->sched_lock); @@ -1428,17 +1427,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc) /* * Flush all the virtual services */ -static int ip_vs_flush(void) +static int ip_vs_flush(struct net *net) { int idx; struct ip_vs_service *svc, *nxt; /* - * Flush the service table hashed by + * Flush the service table hashed by */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { - ip_vs_unlink_service(svc); + list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], + s_list) { + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1448,7 +1449,8 @@ static int ip_vs_flush(void) for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry_safe(svc, nxt, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_unlink_service(svc); + if (net_eq(svc->net, net)) + ip_vs_unlink_service(svc); } } @@ -1472,20 +1474,22 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) return 0; } -static int ip_vs_zero_all(void) +static int ip_vs_zero_all(struct net *net) { int idx; struct ip_vs_service *svc; for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - ip_vs_zero_service(svc); + if (net_eq(svc->net, net)) + ip_vs_zero_service(svc); } } @@ -1763,6 +1767,7 @@ static struct ctl_table_header * sysctl_header; #ifdef CONFIG_PROC_FS struct ip_vs_iter { + struct seq_net_private p; /* Do not move this, netns depends upon it*/ struct list_head *table; int bucket; }; @@ -1789,6 +1794,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags) /* Get the Nth entry in the two lists */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { + struct net *net = seq_file_net(seq); struct ip_vs_iter *iter = seq->private; int idx; struct ip_vs_service *svc; @@ -1796,7 +1802,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* look in hash by protocol */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (pos-- == 0){ + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_table; iter->bucket = idx; return svc; @@ -1807,7 +1813,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) /* keep looking in fwmark */ for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (pos-- == 0) { + if (net_eq(svc->net, net) && pos-- == 0) { iter->table = ip_vs_svc_fwm_table; iter->bucket = idx; return svc; @@ -1961,7 +1967,7 @@ static const struct seq_operations ip_vs_info_seq_ops = { static int ip_vs_info_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ip_vs_info_seq_ops, + return seq_open_net(inode, file, &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter)); } @@ -2011,7 +2017,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) { - return single_open(file, ip_vs_stats_show, NULL); + return single_open_net(inode, file, ip_vs_stats_show); } static const struct file_operations ip_vs_stats_fops = { @@ -2113,6 +2119,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { + struct net *net = sock_net(sk); int ret; unsigned char arg[MAX_ARG_LEN]; struct ip_vs_service_user *usvc_compat; @@ -2147,7 +2154,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ @@ -2174,7 +2181,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out_unlock; } } @@ -2191,10 +2198,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) /* Lookup the exact service by or fwmark */ if (usvc.fwmark == 0) - svc = __ip_vs_service_find(usvc.af, usvc.protocol, + svc = __ip_vs_service_find(net, usvc.af, usvc.protocol, &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD && (svc == NULL || svc->protocol != usvc.protocol)) { @@ -2207,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); break; case IP_VS_SO_SET_EDIT: ret = ip_vs_edit_service(svc, &usvc); @@ -2267,7 +2274,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) } static inline int -__ip_vs_get_service_entries(const struct ip_vs_get_services *get, +__ip_vs_get_service_entries(struct net *net, + const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { int idx, count=0; @@ -2278,7 +2286,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2297,7 +2305,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET) + if (svc->af != AF_INET || !net_eq(svc->net, net)) continue; if (count >= get->num_services) @@ -2317,7 +2325,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, } static inline int -__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, +__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; @@ -2325,9 +2333,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark); else - svc = __ip_vs_service_find(AF_INET, get->protocol, &addr, + svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr, get->port); if (svc) { @@ -2401,7 +2409,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) unsigned char arg[128]; int ret = 0; unsigned int copylen; + struct net *net = sock_net(sk); + BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -2463,7 +2473,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_service_entries(get, user); + ret = __ip_vs_get_service_entries(net, get, user); } break; @@ -2476,10 +2486,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) entry = (struct ip_vs_service_entry *)arg; addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark); + svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark); else - svc = __ip_vs_service_find(AF_INET, entry->protocol, - &addr, entry->port); + svc = __ip_vs_service_find(net, AF_INET, + entry->protocol, &addr, + entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2502,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EINVAL; goto out; } - ret = __ip_vs_get_dest_entries(get, user); + ret = __ip_vs_get_dest_entries(net, get, user); } break; @@ -2722,11 +2733,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, int idx = 0, i; int start = cb->args[0]; struct ip_vs_service *svc; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2737,7 +2749,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start) + if (++idx <= start || !net_eq(svc->net, net)) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -2753,7 +2765,8 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, +static int ip_vs_genl_parse_service(struct net *net, + struct ip_vs_service_user_kern *usvc, struct nlattr *nla, int full_entry, struct ip_vs_service **ret_svc) { @@ -2796,9 +2809,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, } if (usvc->fwmark) - svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark); + svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark); else - svc = __ip_vs_service_find(usvc->af, usvc->protocol, + svc = __ip_vs_service_find(net, usvc->af, usvc->protocol, &usvc->addr, usvc->port); *ret_svc = svc; @@ -2835,13 +2848,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, return 0; } -static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +static struct ip_vs_service *ip_vs_genl_find_service(struct net *net, + struct nlattr *nla) { struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; int ret; - ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc); + ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc); return ret ? ERR_PTR(ret) : svc; } @@ -2909,6 +2923,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + struct net *net; mutex_lock(&__ip_vs_mutex); @@ -2917,7 +2932,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3102,13 +3118,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) struct ip_vs_dest_user_kern udest; int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); if (cmd == IPVS_CMD_FLUSH) { - ret = ip_vs_flush(); + ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { ret = ip_vs_genl_set_config(info->attrs); @@ -3133,7 +3151,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { - ret = ip_vs_zero_all(); + ret = ip_vs_zero_all(net); goto out; } @@ -3143,7 +3161,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = 1; - ret = ip_vs_genl_parse_service(&usvc, + ret = ip_vs_genl_parse_service(net, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); if (ret) @@ -3173,7 +3191,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) switch (cmd) { case IPVS_CMD_NEW_SERVICE: if (svc == NULL) - ret = ip_vs_add_service(&usvc, &svc); + ret = ip_vs_add_service(net, &usvc, &svc); else ret = -EEXIST; break; @@ -3211,7 +3229,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; void *reply; int ret, cmd, reply_cmd; + struct net *net; + net = skb_sknet(skb); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3240,7 +3260,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_service *svc; - svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + svc = ip_vs_genl_find_service(net, + info->attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc)) { ret = PTR_ERR(svc); goto out_err; @@ -3411,9 +3432,15 @@ static void ip_vs_genl_unregister(void) */ int __net_init __ip_vs_control_init(struct net *net) { + int idx; + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, @@ -3445,43 +3472,48 @@ static struct pernet_operations ipvs_control_ops = { int __init ip_vs_control_init(void) { - int ret; int idx; + int ret; EnterFunction(2); - /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ + /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */ for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { INIT_LIST_HEAD(&ip_vs_svc_table[idx]); INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { - INIT_LIST_HEAD(&ip_vs_rtable[idx]); + + ret = register_pernet_subsys(&ipvs_control_ops); + if (ret) { + pr_err("cannot register namespace.\n"); + goto err; } - smp_wmb(); + + smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - return ret; + goto err_net; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); nf_unregister_sockopt(&ip_vs_sockopts); - return ret; + goto err_net; } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) - return ret; - /* Hook the defense timer */ schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); LeaveFunction(2); return 0; + +err_net: + unregister_pernet_subsys(&ipvs_control_ops); +err: + return ret; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index a315159983ad..521b827083fe 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -12,6 +12,7 @@ static int sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; sctp_chunkhdr_t _schunkh, *sch; sctp_sctphdr_t *sh, _sctph; @@ -27,9 +28,9 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, sizeof(_schunkh), &_schunkh); if (sch == NULL) return 0; - + net = skb_net(skb); if ((sch->type == SCTP_CID_INIT) && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 1cdab12abfef..c175d3166263 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -31,6 +31,7 @@ static int tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct tcphdr _tcph, *th; struct ip_vs_iphdr iph; @@ -42,11 +43,11 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - + net = skb_net(skb); /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && - (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, - th->dest))) { + (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, + &iph.daddr, th->dest))) { int ignored; if (ip_vs_todrop()) { diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index cd398de010cc..5ab54f648654 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -31,6 +31,7 @@ static int udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { + struct net *net; struct ip_vs_service *svc; struct udphdr _udph, *uh; struct ip_vs_iphdr iph; @@ -42,8 +43,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, *verdict = NF_DROP; return 0; } - - svc = ip_vs_service_get(af, skb->mark, iph.protocol, + net = skb_net(skb); + svc = ip_vs_service_get(net, af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { int ignored; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3668739a6d06..662aa2c22a05 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -749,7 +749,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ -- cgit v1.2.3-59-g8ed1b From d0a1eef9c38218af20c809b2220a960b7ed81a36 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:44 +0100 Subject: IPVS: netns awarness to lblcr sheduler var sysctl_ip_vs_lblcr_expiration moved to ipvs struct as sysctl_lblcr_expiration procfs updated to handle this. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/netns/ip_vs.h | 5 ++++ net/netfilter/ipvs/ip_vs_lblcr.c | 54 ++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 5b87d22a39fb..51a92ee1b167 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -28,6 +28,11 @@ struct netns_ipvs { #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) struct list_head rs_table[IP_VS_RTAB_SIZE]; + + /* ip_vs_lblcr */ + int sysctl_lblcr_expiration; + struct ctl_table_header *lblcr_ctl_header; + struct ctl_table *lblcr_ctl_table; }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 7c7396a6acbf..61ae8cfcf0b4 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -70,8 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ; - /* * for IPVS lblcr entry hash table @@ -296,7 +294,7 @@ struct ip_vs_lblcr_table { static ctl_table vs_vars_table[] = { { .procname = "lblcr_expiration", - .data = &sysctl_ip_vs_lblcr_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { - if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, - now)) + if (time_after(en->lastuse + + ipvs->sysctl_lblcr_expiration, now)) continue; ip_vs_lblcr_free(en); @@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr); if (en) { + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* We only hold a read lock, but this is atomic */ en->lastuse = jiffies; @@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && time_after(jiffies, en->set.lastmod + - sysctl_ip_vs_lblcr_expiration)) { + ipvs->sysctl_lblcr_expiration)) { struct ip_vs_dest *m; write_lock(&en->set.lock); @@ -749,23 +747,43 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = */ static int __net_init __ip_vs_lblcr_init(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, - vs_vars_table); - if (!sysctl_header) - return -ENOMEM; + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblcr_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblcr_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblcr_ctl_table = vs_vars_table; + ipvs->sysctl_lblcr_expiration = 24*60*60*HZ; + ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration; + + ipvs->lblcr_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblcr_ctl_table); + if (!ipvs->lblcr_ctl_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); + +err_dup: + return -ENOMEM; } static void __net_exit __ip_vs_lblcr_exit(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblcr_ctl_header); - unregister_net_sysctl_table(sysctl_header); + if (!net_eq(net, &init_net)) + kfree(ipvs->lblcr_ctl_table); } static struct pernet_operations ip_vs_lblcr_ops = { -- cgit v1.2.3-59-g8ed1b From b6e885ddb903e681b7cbb4e68ad775154660e1f4 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:45 +0100 Subject: IPVS: netns awarness to lblc sheduler var sysctl_ip_vs_lblc_expiration moved to ipvs struct as sysctl_lblc_expiration procfs updated to handle this. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/netns/ip_vs.h | 4 ++++ net/netfilter/ipvs/ip_vs_lblc.c | 50 ++++++++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 51a92ee1b167..d14581cc4fe0 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -29,6 +29,10 @@ struct netns_ipvs { struct list_head rs_table[IP_VS_RTAB_SIZE]; + /* ip_vs_lblc */ + int sysctl_lblc_expiration; + struct ctl_table_header *lblc_ctl_header; + struct ctl_table *lblc_ctl_table; /* ip_vs_lblcr */ int sysctl_lblcr_expiration; struct ctl_table_header *lblcr_ctl_header; diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 84278fb4e055..d5bec3371871 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -70,7 +70,6 @@ * entries that haven't been touched for a day. */ #define COUNT_FOR_FULL_EXPIRATION 30 -static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; /* @@ -117,7 +116,7 @@ struct ip_vs_lblc_table { static ctl_table vs_vars_table[] = { { .procname = "lblc_expiration", - .data = &sysctl_ip_vs_lblc_expiration, + .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = { { } }; -static struct ctl_table_header * sysctl_header; - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; + struct netns_ipvs *ipvs = net_ipvs(svc->net); for (i=0, j=tbl->rover; isched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, - en->lastuse + sysctl_ip_vs_lblc_expiration)) + en->lastuse + + ipvs->sysctl_lblc_expiration)) continue; ip_vs_lblc_free(en); @@ -548,23 +547,43 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = */ static int __net_init __ip_vs_lblc_init(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return -EPERM; - - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, - vs_vars_table); - if (!sysctl_header) - return -ENOMEM; + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) { + ipvs->lblc_ctl_table = kmemdup(vs_vars_table, + sizeof(vs_vars_table), + GFP_KERNEL); + if (ipvs->lblc_ctl_table == NULL) + goto err_dup; + } else + ipvs->lblc_ctl_table = vs_vars_table; + ipvs->sysctl_lblc_expiration = 24*60*60*HZ; + ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration; + + ipvs->lblc_ctl_header = + register_net_sysctl_table(net, net_vs_ctl_path, + ipvs->lblc_ctl_table); + if (!ipvs->lblc_ctl_header) + goto err_reg; return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); + +err_dup: + return -ENOMEM; } static void __net_exit __ip_vs_lblc_exit(struct net *net) { - if (!net_eq(net, &init_net)) /* netns not enabled yet */ - return; + struct netns_ipvs *ipvs = net_ipvs(net); + + unregister_net_sysctl_table(ipvs->lblc_ctl_header); - unregister_net_sysctl_table(sysctl_header); + if (!net_eq(net, &init_net)) + kfree(ipvs->lblc_ctl_table); } static struct pernet_operations ip_vs_lblc_ops = { @@ -586,7 +605,6 @@ static int __init ip_vs_lblc_init(void) return ret; } - static void __exit ip_vs_lblc_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler); -- cgit v1.2.3-59-g8ed1b From 252c64103237f1841088f0f29b4f084b1c774546 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:46 +0100 Subject: IPVS: netns, prepare protocol Add support for protocol data per name-space. in struct ip_vs_protocol, appcnt will be removed when all protos are modified for network name-space. This patch causes warnings of unused functions, they will be used when next patch will be applied. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++- include/net/netns/ip_vs.h | 3 ++ net/netfilter/ipvs/ip_vs_proto.c | 66 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d551e0d8fd9a..88d4e40b538a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -352,6 +352,7 @@ struct iphdr; struct ip_vs_conn; struct ip_vs_app; struct sk_buff; +struct ip_vs_proto_data; struct ip_vs_protocol { struct ip_vs_protocol *next; @@ -366,6 +367,10 @@ struct ip_vs_protocol { void (*exit)(struct ip_vs_protocol *pp); + void (*init_netns)(struct net *net, struct ip_vs_proto_data *pd); + + void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); + int (*conn_schedule)(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp); @@ -417,7 +422,20 @@ struct ip_vs_protocol { int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); }; -extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); +/* + * protocol data per netns + */ +struct ip_vs_proto_data { + struct ip_vs_proto_data *next; + struct ip_vs_protocol *pp; + int *timeout_table; /* protocol timeout table */ + atomic_t appcnt; /* counter of proto app incs. */ + struct tcp_states_t *tcp_state_table; +}; + +extern struct ip_vs_protocol *ip_vs_proto_get(unsigned short proto); +extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, + unsigned short proto); struct ip_vs_conn_param { const union nf_inet_addr *caddr; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index d14581cc4fe0..6f4e089b8db2 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -28,6 +28,9 @@ struct netns_ipvs { #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) struct list_head rs_table[IP_VS_RTAB_SIZE]; + /* ip_vs_proto */ + #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ + struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 45392942d0e7..576e29648c53 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -60,6 +60,31 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) return 0; } +/* + * register an ipvs protocols netns related data + */ +static int +register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + unsigned hash = IP_VS_PROTO_HASH(pp->protocol); + struct ip_vs_proto_data *pd = + kzalloc(sizeof(struct ip_vs_proto_data), GFP_ATOMIC); + + if (!pd) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + pd->pp = pp; /* For speed issues */ + pd->next = ipvs->proto_data_table[hash]; + ipvs->proto_data_table[hash] = pd; + atomic_set(&pd->appcnt, 0); /* Init app counter */ + + if (pp->init_netns != NULL) + pp->init_netns(net, pd); + + return 0; +} /* * unregister an ipvs protocol @@ -82,6 +107,29 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) return -ESRCH; } +/* + * unregister an ipvs protocols netns data + */ +static int +unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data **pd_p; + unsigned hash = IP_VS_PROTO_HASH(pd->pp->protocol); + + pd_p = &ipvs->proto_data_table[hash]; + for (; *pd_p; pd_p = &(*pd_p)->next) { + if (*pd_p == pd) { + *pd_p = pd->next; + if (pd->pp->exit_netns != NULL) + pd->pp->exit_netns(net, pd); + kfree(pd); + return 0; + } + } + + return -ESRCH; +} /* * get ip_vs_protocol object by its proto. @@ -100,6 +148,24 @@ struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) } EXPORT_SYMBOL(ip_vs_proto_get); +/* + * get ip_vs_protocol object data by netns and proto + */ +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + unsigned hash = IP_VS_PROTO_HASH(proto); + + for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { + if (pd->pp->protocol == proto) + return pd; + } + + return NULL; +} +EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols -- cgit v1.2.3-59-g8ed1b From 4a85b96c08ef84076f84e87280223a4301988ed9 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:47 +0100 Subject: IPVS: netns preparation for proto_tcp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use all ip_vs_proto_data *v3 Removed unused function as sugested by Simon Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- include/net/netns/ip_vs.h | 8 +++ net/netfilter/ipvs/ip_vs_ftp.c | 8 ++- net/netfilter/ipvs/ip_vs_proto.c | 13 ++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 +++++++++++++++++++----------------- 5 files changed, 79 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 88d4e40b538a..3c45a00cdc3e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); -extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); +extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); extern int ip_vs_check_template(struct ip_vs_conn *ct); extern void ip_vs_random_dropentry(void); extern int ip_vs_conn_init(void); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 6f4e089b8db2..ac77363647ab 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -31,6 +31,14 @@ struct netns_ipvs { /* ip_vs_proto */ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; + /* ip_vs_proto_tcp */ +#ifdef CONFIG_IP_VS_PROTO_TCP + #define TCP_APP_TAB_BITS 4 + #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) + #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) + struct list_head tcp_apps[TCP_APP_TAB_SIZE]; + spinlock_t tcp_app_lock; +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 0e762f322aa3..b38ae941f677 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, int ret = 0; enum ip_conntrack_info ctinfo; struct nf_conn *ct; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * would be adjusted twice. */ + net = skb_net(skb); cp->app_data = NULL; - ip_vs_tcp_conn_listen(n_cp); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return ret; } @@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; + struct net *net; #ifdef CONFIG_IP_VS_IPV6 /* This application helper doesn't work with IPv6 yet, @@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Move tunnel to listen state */ - ip_vs_tcp_conn_listen(n_cp); + net = skb_net(skb); + ip_vs_tcp_conn_listen(net, n_cp); ip_vs_conn_put(n_cp); return 1; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 576e29648c53..320c6a65f370 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, */ static int __net_init __ip_vs_protocol_init(struct net *net) { +#ifdef CONFIG_IP_VS_PROTO_TCP + register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); +#endif return 0; } static void __net_exit __ip_vs_protocol_cleanup(struct net *net) { - /* empty */ + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd; + int i; + + /* unregister all the ipvs proto data for this netns */ + for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { + while ((pd = ipvs->proto_data_table[i]) != NULL) + unregister_ip_vs_proto_netns(net, pd); + } } static struct pernet_operations ipvs_proto_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index c175d3166263..9d9df3d61093 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -9,8 +9,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom * + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * tcp_timeouts table has copy per netns in a hash table per + * protocol ip_vs_proto_data and is handled by netns */ #define KMSG_COMPONENT "IPVS" @@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = { /* * Timeout table[state] */ -static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { +static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_NONE] = 2*HZ, [IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ, [IP_VS_TCP_S_SYN_SENT] = 2*60*HZ, @@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) tcp_state_table = (on? tcp_states_dos : tcp_states); } -static int -tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST, - tcp_state_name_table, sname, to); -} - static inline int tcp_state_idx(struct tcphdr *th) { if (th->rst) @@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; + struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - cp->timeout = pp->timeout_table[cp->state = new_state]; + pd = ip_vs_proto_data_get(&init_net, pp->protocol); + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = new_state]; + else /* What to do ? */ + cp->timeout = tcp_timeouts[cp->state = new_state]; } - /* * Handle state transitions */ @@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 1; } - -/* - * Hash table for TCP application incarnations - */ -#define TCP_APP_TAB_BITS 4 -#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS) -#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1) - -static struct list_head tcp_apps[TCP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(tcp_app_lock); - static inline __u16 tcp_app_hashkey(__be16 port) { return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port) @@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); hash = tcp_app_hashkey(port); - spin_lock_bh(&tcp_app_lock); - list_for_each_entry(i, &tcp_apps[hash], p_list) { + spin_lock_bh(&ipvs->tcp_app_lock); + list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &tcp_apps[hash]); - atomic_inc(&ip_vs_protocol_tcp.appcnt); + list_add(&inc->p_list, &ipvs->tcp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); return ret; } @@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc) static void tcp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&tcp_app_lock); - atomic_dec(&ip_vs_protocol_tcp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + + spin_lock_bh(&ipvs->tcp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&tcp_app_lock); + spin_unlock_bh(&ipvs->tcp_app_lock); } static int tcp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - spin_lock(&tcp_app_lock); - list_for_each_entry(inc, &tcp_apps[hash], p_list) { + spin_lock(&ipvs->tcp_app_lock); + list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&tcp_app_lock); + spin_unlock(&ipvs->tcp_app_lock); out: return result; @@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* * Set LISTEN timeout. (ip_vs_conn_put will setup timer) */ -void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp) +void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp) { + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + spin_lock(&cp->lock); cp->state = IP_VS_TCP_S_LISTEN; - cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN]; + cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN] + : tcp_timeouts[IP_VS_TCP_S_LISTEN]); spin_unlock(&cp->lock); } - -static void ip_vs_tcp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(tcp_apps); - pp->timeout_table = tcp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, + sizeof(tcp_timeouts)); +} -static void ip_vs_tcp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, .appcnt = ATOMIC_INIT(0), - .init = ip_vs_tcp_init, - .exit = ip_vs_tcp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_tcp_init, + .exit_netns = __ip_vs_tcp_exit, .register_app = tcp_register_app, .unregister_app = tcp_unregister_app, .conn_schedule = tcp_conn_schedule, @@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .app_conn_bind = tcp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = tcp_timeout_change, - .set_state_timeout = tcp_set_state_timeout, }; -- cgit v1.2.3-59-g8ed1b From 78b16bde104cc74bedbf462b0ebed2990f35ff6b Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:48 +0100 Subject: IPVS: netns preparation for proto_udp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use ip_vs_proto_data *v3 Removed unused function set_state_timeout() Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/netns/ip_vs.h | 8 ++++ net/netfilter/ipvs/ip_vs_proto.c | 3 ++ net/netfilter/ipvs/ip_vs_proto_udp.c | 86 ++++++++++++++++++------------------ 3 files changed, 54 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index ac77363647ab..62b1448d3795 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -39,6 +39,14 @@ struct netns_ipvs { struct list_head tcp_apps[TCP_APP_TAB_SIZE]; spinlock_t tcp_app_lock; #endif + /* ip_vs_proto_udp */ +#ifdef CONFIG_IP_VS_PROTO_UDP + #define UDP_APP_TAB_BITS 4 + #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) + #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) + struct list_head udp_apps[UDP_APP_TAB_SIZE]; + spinlock_t udp_app_lock; +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 320c6a65f370..cdc414238fcb 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -309,6 +309,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); #endif return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 5ab54f648654..71a4721a8f8a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -9,7 +9,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: + * Changes: Hans Schillstrom + * Network name space (netns) aware. * */ @@ -345,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) return 1; } - -/* - * Note: the caller guarantees that only one of register_app, - * unregister_app or app_conn_bind is called each time. - */ - -#define UDP_APP_TAB_BITS 4 -#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) -#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) - -static struct list_head udp_apps[UDP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(udp_app_lock); - static inline __u16 udp_app_hashkey(__be16 port) { return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) @@ -371,22 +359,24 @@ static int udp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); hash = udp_app_hashkey(port); - spin_lock_bh(&udp_app_lock); - list_for_each_entry(i, &udp_apps[hash], p_list) { + spin_lock_bh(&ipvs->udp_app_lock); + list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &udp_apps[hash]); - atomic_inc(&ip_vs_protocol_udp.appcnt); + list_add(&inc->p_list, &ipvs->udp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); return ret; } @@ -394,15 +384,19 @@ static int udp_register_app(struct ip_vs_app *inc) static void udp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&udp_app_lock); - atomic_dec(&ip_vs_protocol_udp.appcnt); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + + spin_lock_bh(&ipvs->udp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&udp_app_lock); + spin_unlock_bh(&ipvs->udp_app_lock); } static int udp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -414,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - spin_lock(&udp_app_lock); - list_for_each_entry(inc, &udp_apps[hash], p_list) { + spin_lock(&ipvs->udp_app_lock); + list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -436,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&udp_app_lock); + spin_unlock(&ipvs->udp_app_lock); out: return result; } -static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { +static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_NORMAL] = 5*60*HZ, [IP_VS_UDP_S_LAST] = 2*HZ, }; @@ -453,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = { [IP_VS_UDP_S_LAST] = "BUG!", }; - -static int -udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, - udp_state_name_table, sname, to); -} - static const char * udp_state_name(int state) { if (state >= IP_VS_UDP_S_LAST) @@ -473,18 +459,31 @@ udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, struct ip_vs_protocol *pp) { - cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; + struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */ + + pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + if (unlikely(!pd)) { + pr_err("UDP no ns data\n"); + return 0; + } + + cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL]; return 1; } -static void udp_init(struct ip_vs_protocol *pp) +static void __udp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(udp_apps); - pp->timeout_table = udp_timeouts; + struct netns_ipvs *ipvs = net_ipvs(net); + + ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE); + spin_lock_init(&ipvs->udp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts, + sizeof(udp_timeouts)); } -static void udp_exit(struct ip_vs_protocol *pp) +static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd) { + kfree(pd->timeout_table); } @@ -493,8 +492,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .protocol = IPPROTO_UDP, .num_states = IP_VS_UDP_S_LAST, .dont_defrag = 0, - .init = udp_init, - .exit = udp_exit, + .init = NULL, + .exit = NULL, + .init_netns = __udp_init, + .exit_netns = __udp_exit, .conn_schedule = udp_conn_schedule, .conn_in_get = ip_vs_conn_in_get_proto, .conn_out_get = ip_vs_conn_out_get_proto, @@ -508,5 +509,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = { .app_conn_bind = udp_app_conn_bind, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, - .set_state_timeout = udp_set_state_timeout, }; -- cgit v1.2.3-59-g8ed1b From 9d934878e7870fbbbd8eaed2e467552536877def Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:49 +0100 Subject: IPVS: netns preparation for proto_sctp In this phase (one), all local vars will be moved to ipvs struct. Remaining work, add param struct net *net to a couple of functions that is common for all protos and use ip_vs_proto_data *v3 Removed unuset function set_state_timeout() Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/netns/ip_vs.h | 9 +++ net/netfilter/ipvs/ip_vs_proto.c | 3 + net/netfilter/ipvs/ip_vs_proto_sctp.c | 121 ++++++++++++++++------------------ 3 files changed, 70 insertions(+), 63 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 62b1448d3795..58bd3fd85a97 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -47,6 +47,15 @@ struct netns_ipvs { struct list_head udp_apps[UDP_APP_TAB_SIZE]; spinlock_t udp_app_lock; #endif + /* ip_vs_proto_sctp */ +#ifdef CONFIG_IP_VS_PROTO_SCTP + #define SCTP_APP_TAB_BITS 4 + #define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) + #define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) + /* Hash table for SCTP application incarnations */ + struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; + spinlock_t sctp_app_lock; +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index cdc414238fcb..001b2f825043 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -312,6 +312,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net) #endif #ifdef CONFIG_IP_VS_PROTO_UDP register_ip_vs_proto_netns(net, &ip_vs_protocol_udp); +#endif +#ifdef CONFIG_IP_VS_PROTO_SCTP + register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp); #endif return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 521b827083fe..f826dd1e4630 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -862,7 +862,7 @@ static struct ipvs_sctp_nextstate /* * Timeout table[state] */ -static int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { +static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = { [IP_VS_SCTP_S_NONE] = 2 * HZ, [IP_VS_SCTP_S_INIT_CLI] = 1 * 60 * HZ, [IP_VS_SCTP_S_INIT_SER] = 1 * 60 * HZ, @@ -906,18 +906,6 @@ static const char *sctp_state_name(int state) return "?"; } -static void sctp_timeout_change(struct ip_vs_protocol *pp, int flags) -{ -} - -static int -sctp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - -return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_SCTP_S_LAST, - sctp_state_name_table, sname, to); -} - static inline int set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) @@ -926,6 +914,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, unsigned char chunk_type; int event, next_state; int ihl; + struct ip_vs_proto_data *pd; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -1001,10 +990,13 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } + pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */ + if (likely(pd)) + cp->timeout = pd->timeout_table[cp->state = next_state]; + else /* What to do ? */ + cp->timeout = sctp_timeouts[cp->state = next_state]; - cp->timeout = pp->timeout_table[cp->state = next_state]; - - return 1; + return 1; } static int @@ -1020,16 +1012,6 @@ sctp_state_transition(struct ip_vs_conn *cp, int direction, return ret; } -/* - * Hash table for SCTP application incarnations - */ -#define SCTP_APP_TAB_BITS 4 -#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS) -#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1) - -static struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; -static DEFINE_SPINLOCK(sctp_app_lock); - static inline __u16 sctp_app_hashkey(__be16 port) { return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port) @@ -1042,34 +1024,40 @@ static int sctp_register_app(struct ip_vs_app *inc) __u16 hash; __be16 port = inc->port; int ret = 0; + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); - spin_lock_bh(&sctp_app_lock); - list_for_each_entry(i, &sctp_apps[hash], p_list) { + spin_lock_bh(&ipvs->sctp_app_lock); + list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) { if (i->port == port) { ret = -EEXIST; goto out; } } - list_add(&inc->p_list, &sctp_apps[hash]); - atomic_inc(&ip_vs_protocol_sctp.appcnt); + list_add(&inc->p_list, &ipvs->sctp_apps[hash]); + atomic_inc(&pd->pp->appcnt); out: - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); return ret; } static void sctp_unregister_app(struct ip_vs_app *inc) { - spin_lock_bh(&sctp_app_lock); - atomic_dec(&ip_vs_protocol_sctp.appcnt); + struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + + spin_lock_bh(&ipvs->sctp_app_lock); + atomic_dec(&pd->pp->appcnt); list_del(&inc->p_list); - spin_unlock_bh(&sctp_app_lock); + spin_unlock_bh(&ipvs->sctp_app_lock); } static int sctp_app_conn_bind(struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(&init_net); int hash; struct ip_vs_app *inc; int result = 0; @@ -1080,12 +1068,12 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - spin_lock(&sctp_app_lock); - list_for_each_entry(inc, &sctp_apps[hash], p_list) { + spin_lock(&ipvs->sctp_app_lock); + list_for_each_entry(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -1101,43 +1089,50 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) goto out; } } - spin_unlock(&sctp_app_lock); + spin_unlock(&ipvs->sctp_app_lock); out: return result; } -static void ip_vs_sctp_init(struct ip_vs_protocol *pp) +/* --------------------------------------------- + * timeouts is netns related now. + * --------------------------------------------- + */ +static void __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd) { - IP_VS_INIT_HASH_TABLE(sctp_apps); - pp->timeout_table = sctp_timeouts; -} + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE); + spin_lock_init(&ipvs->tcp_app_lock); + pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts, + sizeof(sctp_timeouts)); +} -static void ip_vs_sctp_exit(struct ip_vs_protocol *pp) +static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd) { - + kfree(pd->timeout_table); } struct ip_vs_protocol ip_vs_protocol_sctp = { - .name = "SCTP", - .protocol = IPPROTO_SCTP, - .num_states = IP_VS_SCTP_S_LAST, - .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), - .init = ip_vs_sctp_init, - .exit = ip_vs_sctp_exit, - .register_app = sctp_register_app, + .name = "SCTP", + .protocol = IPPROTO_SCTP, + .num_states = IP_VS_SCTP_S_LAST, + .dont_defrag = 0, + .init = NULL, + .exit = NULL, + .init_netns = __ip_vs_sctp_init, + .exit_netns = __ip_vs_sctp_exit, + .register_app = sctp_register_app, .unregister_app = sctp_unregister_app, - .conn_schedule = sctp_conn_schedule, - .conn_in_get = ip_vs_conn_in_get_proto, - .conn_out_get = ip_vs_conn_out_get_proto, - .snat_handler = sctp_snat_handler, - .dnat_handler = sctp_dnat_handler, - .csum_check = sctp_csum_check, - .state_name = sctp_state_name, + .conn_schedule = sctp_conn_schedule, + .conn_in_get = ip_vs_conn_in_get_proto, + .conn_out_get = ip_vs_conn_out_get_proto, + .snat_handler = sctp_snat_handler, + .dnat_handler = sctp_dnat_handler, + .csum_check = sctp_csum_check, + .state_name = sctp_state_name, .state_transition = sctp_state_transition, - .app_conn_bind = sctp_app_conn_bind, - .debug_packet = ip_vs_tcpudp_debug_packet, - .timeout_change = sctp_timeout_change, - .set_state_timeout = sctp_set_state_timeout, + .app_conn_bind = sctp_app_conn_bind, + .debug_packet = ip_vs_tcpudp_debug_packet, + .timeout_change = NULL, }; -- cgit v1.2.3-59-g8ed1b From 9330419d9aa4f97df412ac9be9fc0388c67dd315 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:51 +0100 Subject: IPVS: netns, use ip_vs_proto_data as param. ip_vs_protocol *pp is replaced by ip_vs_proto_data *pd in function call in ip_vs_protocol struct i.e. :, - timeout_change() - state_transition() ip_vs_protocol_timeout_change() got ipvs as param, due to above and a upcoming patch - defence work Most of this changes are triggered by Julians comment: "tcp_timeout_change should work with the new struct ip_vs_proto_data so that tcp_state_table will go to pd->state_table and set_tcp_state will get pd instead of pp" *v3 Mostly comments from Julian The pp -> pd conversion should start from functions like ip_vs_out() that use pp = ip_vs_proto_get(iph.protocol), now they should use ip_vs_proto_data_get(net, iph.protocol). conn_in_get() and conn_out_get() unused param *pp, removed. *v4 ip_vs_protocol_timeout_change() walk the proto_data path. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 18 +++----- net/netfilter/ipvs/ip_vs_conn.c | 2 - net/netfilter/ipvs/ip_vs_core.c | 77 ++++++++++++++++++++------------- net/netfilter/ipvs/ip_vs_ctl.c | 55 ++++++++++++++--------- net/netfilter/ipvs/ip_vs_proto.c | 21 ++++++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 10 ++--- net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++---- net/netfilter/ipvs/ip_vs_proto_tcp.c | 27 +++++------- net/netfilter/ipvs/ip_vs_proto_udp.c | 11 ++--- net/netfilter/xt_ipvs.c | 2 +- 10 files changed, 129 insertions(+), 110 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3c45a00cdc3e..464ea365ca07 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -372,13 +372,12 @@ struct ip_vs_protocol { void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd); int (*conn_schedule)(int af, struct sk_buff *skb, - struct ip_vs_protocol *pp, + struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp); struct ip_vs_conn * (*conn_in_get)(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -386,7 +385,6 @@ struct ip_vs_protocol { struct ip_vs_conn * (*conn_out_get)(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -404,7 +402,7 @@ struct ip_vs_protocol { int (*state_transition)(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp); + struct ip_vs_proto_data *pd); int (*register_app)(struct ip_vs_app *inc); @@ -417,9 +415,7 @@ struct ip_vs_protocol { int offset, const char *msg); - void (*timeout_change)(struct ip_vs_protocol *pp, int flags); - - int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to); + void (*timeout_change)(struct ip_vs_proto_data *pd, int flags); }; /* @@ -778,7 +774,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -786,7 +781,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse); @@ -917,7 +911,7 @@ static inline void ip_vs_pe_put(const struct ip_vs_pe *pe) */ extern int ip_vs_protocol_init(void); extern void ip_vs_protocol_cleanup(void); -extern void ip_vs_protocol_timeout_change(int flags); +extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags); extern int *ip_vs_create_timeout_table(int *table, int size); extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, @@ -947,9 +941,9 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored); + struct ip_vs_proto_data *pd, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp); + struct ip_vs_proto_data *pd); /* diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 7a0e79e3ad0f..a7aba6a4697e 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -329,7 +329,6 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -428,7 +427,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index d0616ea1eebf..9317affc5ea1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -177,11 +177,11 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) static inline int ip_vs_set_state(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - if (unlikely(!pp->state_transition)) + if (unlikely(!pd->pp->state_transition)) return 0; - return pp->state_transition(cp, direction, skb, pp); + return pd->pp->state_transition(cp, direction, skb, pd); } static inline int @@ -378,8 +378,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, */ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp, int *ignored) + struct ip_vs_proto_data *pd, int *ignored) { + struct ip_vs_protocol *pp = pd->pp; struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; @@ -408,7 +409,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * Do not schedule replies from local real server. */ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && - (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { + (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) { IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); @@ -479,11 +480,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * no destination is available for a new connection. */ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -530,10 +532,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, ip_vs_in_stats(cp, skb); /* set state */ - cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); /* transmit the first SYN packet */ - ret = cp->packet_xmit(skb, cp, pp); + ret = cp->packet_xmit(skb, cp, pd->pp); /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); @@ -840,7 +842,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -917,7 +919,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) return NF_ACCEPT; @@ -956,9 +958,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) * Used for NAT and local client. */ static unsigned int -handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { + struct ip_vs_protocol *pp = pd->pp; + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) @@ -1007,7 +1011,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd); skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) ip_vs_notrack(skb); @@ -1034,6 +1038,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; EnterFunction(11); @@ -1079,9 +1084,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; + pp = pd->pp; /* reassemble IP fragments */ #ifdef CONFIG_IP_VS_IPV6 @@ -1107,10 +1113,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); if (likely(cp)) - return handle_response(af, skb, pp, cp, iph.len); + return handle_response(af, skb, pd, cp, iph.len); if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || @@ -1236,12 +1242,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, static int ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, ihl, verdict; union nf_inet_addr snet; @@ -1283,9 +1291,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->protocol); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ if (unlikely(cih->frag_off & htons(IP_OFFSET) && @@ -1299,10 +1309,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1); if (cp) { snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, @@ -1346,6 +1356,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) { + struct net *net = NULL; struct ipv6hdr *iph; struct icmp6hdr _icmph, *ic; struct ipv6hdr _ciph, *cih; /* The ip header contained @@ -1353,6 +1364,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; unsigned int offset, verdict; union nf_inet_addr snet; struct rt6_info *rt; @@ -1395,9 +1407,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (cih == NULL) return NF_ACCEPT; /* The packet looks wrong, ignore */ - pp = ip_vs_proto_get(cih->nexthdr); - if (!pp) + net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->nexthdr); + if (!pd) return NF_ACCEPT; + pp = pd->pp; /* Is the embedded protocol header present? */ /* TODO: we don't support fragmentation at the moment anyways */ @@ -1411,10 +1425,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_fill_iphdr(AF_INET6, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1); if (!cp) { /* The packet could also belong to a local client */ - cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1); if (cp) { ipv6_addr_copy(&snet.in6, &iph->saddr); return handle_response_icmp(AF_INET6, skb, &snet, @@ -1457,8 +1471,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { + struct net *net = NULL; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; @@ -1514,20 +1530,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } + net = skb_net(skb); /* Protocol supported? */ - pp = ip_vs_proto_get(iph.protocol); - if (unlikely(!pp)) + pd = ip_vs_proto_data_get(net, iph.protocol); + if (unlikely(!pd)) return NF_ACCEPT; - + pp = pd->pp; /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); + cp = pp->conn_in_get(af, skb, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(af, skb, pp, &v, &cp)) + if (!pp->conn_schedule(af, skb, pd, &v, &cp)) return v; } @@ -1555,7 +1572,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } ip_vs_in_stats(cp, skb); - restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp); + restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd); if (cp->packet_xmit) ret = cp->packet_xmit(skb, cp, pp); /* do not touch skb anymore */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2d7c96bd2114..88474f1e828a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -38,6 +38,7 @@ #include #include +#include #include #ifdef CONFIG_IP_VS_IPV6 #include @@ -125,7 +126,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs */ -static void update_defense_level(void) +static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; static int old_secure_tcp = 0; @@ -239,7 +240,8 @@ static void update_defense_level(void) } old_secure_tcp = sysctl_ip_vs_secure_tcp; if (to_change >= 0) - ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); + ip_vs_protocol_timeout_change(ipvs, + sysctl_ip_vs_secure_tcp > 1); spin_unlock(&ip_vs_securetcp_lock); local_bh_enable(); @@ -255,7 +257,10 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - update_defense_level(); + struct net *net = &init_net; + struct netns_ipvs *ipvs = net_ipvs(net); + + update_defense_level(ipvs); if (atomic_read(&ip_vs_dropentry)) ip_vs_random_dropentry(); @@ -1502,6 +1507,7 @@ static int proc_do_defense_mode(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = current->nsproxy->net_ns; int *valp = table->data; int val = *valp; int rc; @@ -1512,7 +1518,7 @@ proc_do_defense_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - update_defense_level(); + update_defense_level(net_ipvs(net)); } } return rc; @@ -2033,8 +2039,10 @@ static const struct file_operations ip_vs_stats_fops = { /* * Set timeout values for tcp tcpfin udp in the timeout_table. */ -static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) +static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", u->tcp_timeout, u->tcp_fin_timeout, @@ -2042,19 +2050,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) #ifdef CONFIG_IP_VS_PROTO_TCP if (u->tcp_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] = u->tcp_timeout * HZ; } if (u->tcp_fin_timeout) { - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] = u->tcp_fin_timeout * HZ; } #endif #ifdef CONFIG_IP_VS_PROTO_UDP if (u->udp_timeout) { - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + pd->timeout_table[IP_VS_UDP_S_NORMAL] = u->udp_timeout * HZ; } #endif @@ -2158,7 +2169,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_TIMEOUT) { /* Set timeout values for (tcp tcpfin udp) */ - ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); + ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; @@ -2370,17 +2381,19 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get, } static inline void -__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) +__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u) { + struct ip_vs_proto_data *pd; + #ifdef CONFIG_IP_VS_PROTO_TCP - u->tcp_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; - u->tcp_fin_timeout = - ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; + pd = ip_vs_proto_data_get(net, IPPROTO_TCP); + u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; + u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; #endif #ifdef CONFIG_IP_VS_PROTO_UDP + pd = ip_vs_proto_data_get(net, IPPROTO_UDP); u->udp_timeout = - ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; + pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; #endif } @@ -2521,7 +2534,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (copy_to_user(user, &t, sizeof(t)) != 0) ret = -EFAULT; } @@ -3092,11 +3105,11 @@ static int ip_vs_genl_del_daemon(struct nlattr **attrs) return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } -static int ip_vs_genl_set_config(struct nlattr **attrs) +static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); @@ -3108,7 +3121,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs) if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); - return ip_vs_set_timeout(&t); + return ip_vs_set_timeout(net, &t); } static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) @@ -3129,7 +3142,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) ret = ip_vs_flush(net); goto out; } else if (cmd == IPVS_CMD_SET_CONFIG) { - ret = ip_vs_genl_set_config(info->attrs); + ret = ip_vs_genl_set_config(net, info->attrs); goto out; } else if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { @@ -3281,7 +3294,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) { struct ip_vs_timeout_user t; - __ip_vs_get_timeouts(&t); + __ip_vs_get_timeouts(net, &t); #ifdef CONFIG_IP_VS_PROTO_TCP NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 9f609d4d5d58..6ac986cdcff3 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -152,9 +152,8 @@ EXPORT_SYMBOL(ip_vs_proto_get); * get ip_vs_protocol object data by netns and proto */ struct ip_vs_proto_data * -ip_vs_proto_data_get(struct net *net, unsigned short proto) +__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) { - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; unsigned hash = IP_VS_PROTO_HASH(proto); @@ -165,20 +164,28 @@ ip_vs_proto_data_get(struct net *net, unsigned short proto) return NULL; } + +struct ip_vs_proto_data * +ip_vs_proto_data_get(struct net *net, unsigned short proto) +{ + struct netns_ipvs *ipvs = net_ipvs(net); + + return __ipvs_proto_data_get(ipvs, proto); +} EXPORT_SYMBOL(ip_vs_proto_data_get); /* * Propagate event for state change to all protocols */ -void ip_vs_protocol_timeout_change(int flags) +void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) { - struct ip_vs_protocol *pp; + struct ip_vs_proto_data *pd; int i; for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { - for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) { - if (pp->timeout_change) - pp->timeout_change(pp, flags); + for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { + if (pd->pp->timeout_change) + pd->pp->timeout_change(pd, flags); } } } diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index b8b37fafc988..28039cbfcff4 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -55,7 +55,7 @@ ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, } static struct ip_vs_conn * -ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_in_get(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { @@ -72,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -83,7 +83,6 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * ah_esp_conn_out_get(int af, const struct sk_buff *skb, - struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) @@ -97,7 +96,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", inverse ? "ICMP+" : "", - pp->name, + ip_vs_proto_get(iph->protocol)->name, IP_VS_DBG_ADDR(af, &iph->saddr), IP_VS_DBG_ADDR(af, &iph->daddr)); } @@ -107,7 +106,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, static int -ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { /* @@ -137,7 +136,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .app_conn_bind = NULL, .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, }; #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index f826dd1e4630..19bc37976ea7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -9,7 +9,7 @@ #include static int -sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -47,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -907,14 +907,13 @@ static const char *sctp_state_name(int state) } static inline int -set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, const struct sk_buff *skb) { sctp_chunkhdr_t _sctpch, *sch; unsigned char chunk_type; int event, next_state; int ihl; - struct ip_vs_proto_data *pd; #ifdef CONFIG_IP_VS_IPV6 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); @@ -966,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((direction == IP_VS_DIR_OUTPUT) ? "output " : "input "), IP_VS_DBG_ADDR(cp->af, &cp->daddr), @@ -990,7 +989,6 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */ if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = next_state]; else /* What to do ? */ @@ -1001,12 +999,12 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int sctp_state_transition(struct ip_vs_conn *cp, int direction, - const struct sk_buff *skb, struct ip_vs_protocol *pp) + const struct sk_buff *skb, struct ip_vs_proto_data *pd) { int ret = 0; spin_lock(&cp->lock); - ret = set_sctp_state(pp, cp, direction, skb); + ret = set_sctp_state(pd, cp, direction, skb); spin_unlock(&cp->lock); return ret; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 9d9df3d61093..d7c245532798 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -32,7 +32,7 @@ #include static int -tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -68,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -448,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = { /*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }}, }; -static struct tcp_states_t *tcp_state_table = tcp_states; - - -static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) +static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags) { int on = (flags & 1); /* secure_tcp */ @@ -461,7 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags) ** for most if not for all of the applications. Something ** like "capabilities" (flags) for each object. */ - tcp_state_table = (on? tcp_states_dos : tcp_states); + pd->tcp_state_table = (on ? tcp_states_dos : tcp_states); } static inline int tcp_state_idx(struct tcphdr *th) @@ -478,13 +475,12 @@ static inline int tcp_state_idx(struct tcphdr *th) } static inline void -set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, +set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int direction, struct tcphdr *th) { int state_idx; int new_state = IP_VS_TCP_S_CLOSE; int state_off = tcp_state_off[direction]; - struct ip_vs_proto_data *pd; /* Temp fix */ /* * Update state offset to INPUT_ONLY if necessary @@ -502,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, goto tcp_state_out; } - new_state = tcp_state_table[state_off+state_idx].next_state[cp->state]; + new_state = + pd->tcp_state_table[state_off+state_idx].next_state[cp->state]; tcp_state_out: if (new_state != cp->state) { @@ -510,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" "%s:%d state: %s->%s conn->refcnt:%d\n", - pp->name, + pd->pp->name, ((state_off == TCP_DIR_OUTPUT) ? "output " : "input "), th->syn ? 'S' : '.', @@ -540,7 +537,6 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, } } - pd = ip_vs_proto_data_get(&init_net, pp->protocol); if (likely(pd)) cp->timeout = pd->timeout_table[cp->state = new_state]; else /* What to do ? */ @@ -553,7 +549,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, static int tcp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { struct tcphdr _tcph, *th; @@ -568,7 +564,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, return 0; spin_lock(&cp->lock); - set_tcp_state(pp, cp, direction, th); + set_tcp_state(pd, cp, direction, th); spin_unlock(&cp->lock); return 1; @@ -691,6 +687,7 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd) spin_lock_init(&ipvs->tcp_app_lock); pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts, sizeof(tcp_timeouts)); + pd->tcp_state_table = tcp_states; } static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd) diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 71a4721a8f8a..aa85df2f14a0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ #include static int -udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, int *verdict, struct ip_vs_conn **cpp) { struct net *net; @@ -64,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + *cpp = ip_vs_schedule(svc, skb, pd, &ignored); if (!*cpp && ignored <= 0) { if (!ignored) - *verdict = ip_vs_leave(svc, skb, pp); + *verdict = ip_vs_leave(svc, skb, pd); else { ip_vs_service_put(svc); *verdict = NF_DROP; @@ -457,11 +457,8 @@ static const char * udp_state_name(int state) static int udp_state_transition(struct ip_vs_conn *cp, int direction, const struct sk_buff *skb, - struct ip_vs_protocol *pp) + struct ip_vs_proto_data *pd) { - struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */ - - pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); if (unlikely(!pd)) { pr_err("UDP no ns data\n"); return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 9127a3d8aa35..bb10b0717f1b 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par) /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */); + cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */); if (unlikely(cp == NULL)) { match = false; goto out; -- cgit v1.2.3-59-g8ed1b From 9bbac6a904d0816dae58b454692c54d6773cc20d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:52 +0100 Subject: IPVS: netns, common protocol changes and use of appcnt. appcnt and timeout_table moved from struct ip_vs_protocol to ip_vs proto_data. struct net *net added as first param to - register_app() - unregister_app() - app_conn_bind() - ip_vs_conn_new() [horms@verge.net.au: removed cosmetic-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 -- net/netfilter/ipvs/ip_vs_conn.c | 6 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 +-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 ++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-- net/netfilter/ipvs/ip_vs_sync.c | 55 +++++++++++++++++++---------------- 6 files changed, 39 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 464ea365ca07..cc6ae621a9b5 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -360,8 +360,6 @@ struct ip_vs_protocol { u16 protocol; u16 num_states; int dont_defrag; - atomic_t appcnt; /* counter of proto app incs */ - int *timeout_table; /* protocol timeout table */ void (*init)(struct ip_vs_protocol *pp); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a7aba6a4697e..b2024c942345 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -804,7 +804,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -863,8 +863,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, #endif ip_vs_bind_xmit(cp); - if (unlikely(pp && atomic_read(&pp->appcnt))) - ip_vs_bind_app(cp, pp); + if (unlikely(pd && atomic_read(&pd->appcnt))) + ip_vs_bind_app(cp, pd->pp); /* * Allow conntrack to be preserved. By default, conntrack diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 19bc37976ea7..0f14f793318a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1035,7 +1035,7 @@ static int sctp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->sctp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->sctp_app_lock); @@ -1048,7 +1048,7 @@ static void sctp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->sctp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d7c245532798..290b3803d8ce 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -596,7 +596,7 @@ static int tcp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->tcp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->tcp_app_lock); @@ -611,7 +611,7 @@ tcp_unregister_app(struct ip_vs_app *inc) struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->tcp_app_lock); } @@ -701,7 +701,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = { .protocol = IPPROTO_TCP, .num_states = IP_VS_TCP_S_LAST, .dont_defrag = 0, - .appcnt = ATOMIC_INIT(0), .init = NULL, .exit = NULL, .init_netns = __ip_vs_tcp_init, diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index aa85df2f14a0..3719837a8fdc 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -373,7 +373,7 @@ static int udp_register_app(struct ip_vs_app *inc) } } list_add(&inc->p_list, &ipvs->udp_apps[hash]); - atomic_inc(&pd->pp->appcnt); + atomic_inc(&pd->appcnt); out: spin_unlock_bh(&ipvs->udp_app_lock); @@ -388,7 +388,7 @@ udp_unregister_app(struct ip_vs_app *inc) struct netns_ipvs *ipvs = net_ipvs(&init_net); spin_lock_bh(&ipvs->udp_app_lock); - atomic_dec(&pd->pp->appcnt); + atomic_dec(&pd->appcnt); list_del(&inc->p_list); spin_unlock_bh(&ipvs->udp_app_lock); } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 662aa2c22a05..6831e8fac8db 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -725,17 +725,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, * Param: ... * timeout is in sec. */ -static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, - unsigned state, unsigned protocol, unsigned type, +static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, + unsigned int flags, unsigned int state, + unsigned int protocol, unsigned int type, const union nf_inet_addr *daddr, __be16 dport, unsigned long timeout, __u32 fwmark, - struct ip_vs_sync_conn_options *opt, - struct ip_vs_protocol *pp) + struct ip_vs_sync_conn_options *opt) { struct ip_vs_dest *dest; struct ip_vs_conn *cp; - if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); else @@ -821,17 +820,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags, if (timeout > MAX_SCHEDULE_TIMEOUT / HZ) timeout = MAX_SCHEDULE_TIMEOUT / HZ; cp->timeout = timeout*HZ; - } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table) - cp->timeout = pp->timeout_table[state]; - else - cp->timeout = (3*60*HZ); + } else { + struct ip_vs_proto_data *pd; + + pd = ip_vs_proto_data_get(net, protocol); + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table) + cp->timeout = pd->timeout_table[state]; + else + cp->timeout = (3*60*HZ); + } ip_vs_conn_put(cp); } /* * Process received multicast message for Version 0 */ -static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) +static void ip_vs_process_message_v0(struct net *net, const char *buffer, + const size_t buflen) { struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer; struct ip_vs_sync_conn_v0 *s; @@ -879,7 +884,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n", state); @@ -894,9 +898,9 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen) s->vport, ¶m); /* Send timeout as Zero */ - ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET, + ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET, (union nf_inet_addr *)&s->daddr, s->dport, - 0, 0, opt, pp); + 0, 0, opt); } } @@ -945,7 +949,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len, /* * Process a Version 1 sync. connection */ -static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) +static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) { struct ip_vs_sync_conn_options opt; union ip_vs_sync_conn *s; @@ -1043,7 +1047,6 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } } else { /* protocol in templates is not used for state/timeout */ - pp = NULL; if (state > 0) { IP_VS_DBG(3, "BACKUP, Invalid template state %u\n", state); @@ -1058,18 +1061,18 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end) } /* If only IPv4, just silent skip IPv6 */ if (af == AF_INET) - ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af, (union nf_inet_addr *)&s->v4.daddr, s->v4.dport, ntohl(s->v4.timeout), ntohl(s->v4.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #ifdef CONFIG_IP_VS_IPV6 else - ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af, + ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af, (union nf_inet_addr *)&s->v6.daddr, s->v6.dport, ntohl(s->v6.timeout), ntohl(s->v6.fwmark), - (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL), - pp); + (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) + ); #endif return 0; /* Error exit */ @@ -1083,7 +1086,8 @@ out: * ip_vs_conn entries. * Handles Version 0 & 1 */ -static void ip_vs_process_message(__u8 *buffer, const size_t buflen) +static void ip_vs_process_message(struct net *net, __u8 *buffer, + const size_t buflen) { struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; @@ -1136,7 +1140,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) return; } /* Process a single sync_conn */ - if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) { + retc = ip_vs_proc_sync_conn(net, p, msg_end); + if (retc < 0) { IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n", retc); return; @@ -1146,7 +1151,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen) } } else { /* Old type of message */ - ip_vs_process_message_v0(buffer, buflen); + ip_vs_process_message_v0(net, buffer, buflen); return; } } @@ -1500,7 +1505,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(tinfo->buf, len); + ip_vs_process_message(&init_net, tinfo->buf, len); local_bh_enable(); } } -- cgit v1.2.3-59-g8ed1b From ab8a5e8408c3df2d654611bffc3aaf04f418b266 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:53 +0100 Subject: IPVS: netns awareness to ip_vs_app All variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) in ip_vs_protocol param struct net *net added to: - register_app() - unregister_app() This affected almost all proto_xxx.c files Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 +++--- include/net/netns/ip_vs.h | 5 +++ net/netfilter/ipvs/ip_vs_app.c | 73 +++++++++++++++++++++-------------- net/netfilter/ipvs/ip_vs_ftp.c | 8 ++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++--- net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++--- 7 files changed, 76 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index cc6ae621a9b5..0cdd8ce454c2 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -402,9 +402,9 @@ struct ip_vs_protocol { const struct sk_buff *skb, struct ip_vs_proto_data *pd); - int (*register_app)(struct ip_vs_app *inc); + int (*register_app)(struct net *net, struct ip_vs_app *inc); - void (*unregister_app)(struct ip_vs_app *inc); + void (*unregister_app)(struct net *net, struct ip_vs_app *inc); int (*app_conn_bind)(struct ip_vs_conn *cp); @@ -871,12 +871,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) * (from ip_vs_app.c) */ #define IP_VS_APP_MAX_PORTS 8 -extern int register_ip_vs_app(struct ip_vs_app *app); -extern void unregister_ip_vs_app(struct ip_vs_app *app); +extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app); +extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app); extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp); extern void ip_vs_unbind_app(struct ip_vs_conn *cp); -extern int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); +extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, + __u16 proto, __u16 port); extern int ip_vs_app_inc_get(struct ip_vs_app *inc); extern void ip_vs_app_inc_put(struct ip_vs_app *inc); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 58bd3fd85a97..03f7fe1bede6 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -28,6 +28,11 @@ struct netns_ipvs { #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) struct list_head rs_table[IP_VS_RTAB_SIZE]; + /* ip_vs_app */ + struct list_head app_list; + struct mutex app_mutex; + struct lock_class_key app_key; /* mutex debuging */ + /* ip_vs_proto */ #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE]; diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 40b09ccc4896..286f46594e0e 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app); EXPORT_SYMBOL(unregister_ip_vs_app); EXPORT_SYMBOL(register_ip_vs_app_inc); -/* ipvs application list head */ -static LIST_HEAD(ip_vs_app_list); -static DEFINE_MUTEX(__ip_vs_app_mutex); - - /* * Get an ip_vs_app object */ @@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app) * Allocate/initialize app incarnation and register it in proto apps. */ static int -ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) +ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { struct ip_vs_protocol *pp; struct ip_vs_app *inc; @@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) } } - ret = pp->register_app(inc); + ret = pp->register_app(net, inc); if (ret) goto out; @@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port) * Release app incarnation */ static void -ip_vs_app_inc_release(struct ip_vs_app *inc) +ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc) { struct ip_vs_protocol *pp; @@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc) return; if (pp->unregister_app) - pp->unregister_app(inc); + pp->unregister_app(net, inc); IP_VS_DBG(9, "%s App %s:%u unregistered\n", pp->name, inc->name, ntohs(inc->port)); @@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc) * Register an application incarnation in protocol applications */ int -register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) +register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto, + __u16 port) { + struct netns_ipvs *ipvs = net_ipvs(net); int result; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - result = ip_vs_app_inc_new(app, proto, port); + result = ip_vs_app_inc_new(net, app, proto, port); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return result; } @@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port) /* * ip_vs_app registration routine */ -int register_ip_vs_app(struct ip_vs_app *app) +int register_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); - list_add(&app->a_list, &ip_vs_app_list); + list_add(&app->a_list, &ipvs->app_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); return 0; } @@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app) * ip_vs_app unregistration routine * We are sure there are no app incarnations attached to services */ -void unregister_ip_vs_app(struct ip_vs_app *app) +void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_app *inc, *nxt; - mutex_lock(&__ip_vs_app_mutex); + mutex_lock(&ipvs->app_mutex); list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) { - ip_vs_app_inc_release(inc); + ip_vs_app_inc_release(net, inc); } list_del(&app->a_list); - mutex_unlock(&__ip_vs_app_mutex); + mutex_unlock(&ipvs->app_mutex); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app) /* * Bind ip_vs_conn to its ip_vs_app (called by cp constructor) */ -int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp) +int ip_vs_bind_app(struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) { return pp->app_conn_bind(cp); } @@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) * /proc/net/ip_vs_app entry function */ -static struct ip_vs_app *ip_vs_app_idx(loff_t pos) +static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos) { struct ip_vs_app *app, *inc; - list_for_each_entry(app, &ip_vs_app_list, a_list) { + list_for_each_entry(app, &ipvs->app_list, a_list) { list_for_each_entry(inc, &app->incs_list, a_list) { if (pos-- == 0) return inc; @@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos) static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos) { - mutex_lock(&__ip_vs_app_mutex); + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + + mutex_lock(&ipvs->app_mutex); - return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_app *inc, *app; struct list_head *e; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_app_idx(0); + return ip_vs_app_idx(ipvs, 0); inc = v; app = inc->app; @@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) return list_entry(e, struct ip_vs_app, a_list); /* go on to next application */ - for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) { + for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) { app = list_entry(e, struct ip_vs_app, a_list); list_for_each_entry(inc, &app->incs_list, a_list) { return inc; @@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ip_vs_app_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&__ip_vs_app_mutex); + struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq)); + + mutex_unlock(&ipvs->app_mutex); } static int ip_vs_app_seq_show(struct seq_file *seq, void *v) @@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = { static int ip_vs_app_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_app_seq_ops); + return seq_open_net(inode, file, &ip_vs_app_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ip_vs_app_fops = { @@ -571,9 +580,13 @@ static const struct file_operations ip_vs_app_fops = { static int __net_init __ip_vs_app_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + INIT_LIST_HEAD(&ipvs->app_list); + __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key); proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index b38ae941f677..77b0036dcb73 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -414,14 +414,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; - ret = register_ip_vs_app(app); + ret = register_ip_vs_app(net, app); if (ret) return ret; for (i=0; iprotocol, ports[i]); + ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]); if (ret) break; pr_info("%s: loaded support on port[%d] = %d\n", @@ -429,7 +429,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net) } if (ret) - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); return ret; } @@ -443,7 +443,7 @@ static void __ip_vs_ftp_exit(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - unregister_ip_vs_app(app); + unregister_ip_vs_app(net, app); } static struct pernet_operations ip_vs_ftp_ops = { diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 0f14f793318a..569e77bf08c4 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1016,14 +1016,14 @@ static inline __u16 sctp_app_hashkey(__be16 port) & SCTP_APP_TAB_MASK; } -static int sctp_register_app(struct ip_vs_app *inc) +static int sctp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); hash = sctp_app_hashkey(port); @@ -1042,10 +1042,10 @@ out: return ret; } -static void sctp_unregister_app(struct ip_vs_app *inc) +static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP); spin_lock_bh(&ipvs->sctp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 290b3803d8ce..757aaaf083bb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -577,14 +577,14 @@ static inline __u16 tcp_app_hashkey(__be16 port) } -static int tcp_register_app(struct ip_vs_app *inc) +static int tcp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); hash = tcp_app_hashkey(port); @@ -605,10 +605,10 @@ static int tcp_register_app(struct ip_vs_app *inc) static void -tcp_unregister_app(struct ip_vs_app *inc) +tcp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP); spin_lock_bh(&ipvs->tcp_app_lock); atomic_dec(&pd->appcnt); diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 3719837a8fdc..1dc394100fa8 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -353,14 +353,14 @@ static inline __u16 udp_app_hashkey(__be16 port) } -static int udp_register_app(struct ip_vs_app *inc) +static int udp_register_app(struct net *net, struct ip_vs_app *inc) { struct ip_vs_app *i; __u16 hash; __be16 port = inc->port; int ret = 0; - struct netns_ipvs *ipvs = net_ipvs(&init_net); - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); hash = udp_app_hashkey(port); @@ -382,10 +382,10 @@ static int udp_register_app(struct ip_vs_app *inc) static void -udp_unregister_app(struct ip_vs_app *inc) +udp_unregister_app(struct net *net, struct ip_vs_app *inc) { - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP); - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP); + struct netns_ipvs *ipvs = net_ipvs(net); spin_lock_bh(&ipvs->udp_app_lock); atomic_dec(&pd->appcnt); -- cgit v1.2.3-59-g8ed1b From 29c2026fd4980c144d9c746dc1565060f08e5796 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:54 +0100 Subject: IPVS: netns awareness to ip_vs_est All variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) *v3 timer per ns instead of a common timer in estimator. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 +- include/net/netns/ip_vs.h | 4 ++ net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++----- net/netfilter/ipvs/ip_vs_est.c | 86 +++++++++++++++++++++++------------------- 4 files changed, 64 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0cdd8ce454c2..c08927bb1728 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1004,8 +1004,8 @@ extern void ip_vs_sync_cleanup(void); */ extern int ip_vs_estimator_init(void); extern void ip_vs_estimator_cleanup(void); -extern void ip_vs_new_estimator(struct ip_vs_stats *stats); -extern void ip_vs_kill_estimator(struct ip_vs_stats *stats); +extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats); +extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats); extern void ip_vs_zero_estimator(struct ip_vs_stats *stats); /* diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 03f7fe1bede6..db0240198339 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -70,6 +70,10 @@ struct netns_ipvs { int sysctl_lblcr_expiration; struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; + /* ip_vs_est */ + struct list_head est_list; /* estimator list */ + spinlock_t est_lock; + struct timer_list est_timer; /* Estimation timer */ }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 88474f1e828a..c89beb8eafbb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -816,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, spin_unlock(&dest->dst_lock); if (add) - ip_vs_new_estimator(&dest->stats); + ip_vs_new_estimator(svc->net, &dest->stats); write_lock_bh(&__ip_vs_svc_lock); @@ -1009,9 +1009,9 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete a destination (must be already unlinked from the service) */ -static void __ip_vs_del_dest(struct ip_vs_dest *dest) +static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { - ip_vs_kill_estimator(&dest->stats); + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. @@ -1080,6 +1080,7 @@ static int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; + struct net *net = svc->net; __be16 dport = udest->port; EnterFunction(2); @@ -1108,7 +1109,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(dest); + __ip_vs_del_dest(net, dest); LeaveFunction(2); @@ -1197,7 +1198,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, else if (svc->port == 0) atomic_inc(&ip_vs_nullsvc_counter); - ip_vs_new_estimator(&svc->stats); + ip_vs_new_estimator(net, &svc->stats); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1345,7 +1346,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) if (svc->af == AF_INET) ip_vs_num_services--; - ip_vs_kill_estimator(&svc->stats); + ip_vs_kill_estimator(svc->net, &svc->stats); /* Unbind scheduler */ old_sched = svc->scheduler; @@ -1368,7 +1369,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) */ list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { __ip_vs_unlink_dest(svc, dest, 0); - __ip_vs_del_dest(dest); + __ip_vs_del_dest(svc->net, dest); } /* @@ -3460,7 +3461,7 @@ int __net_init __ip_vs_control_init(struct net *net) vs_vars); if (sysctl_header == NULL) goto err_reg; - ip_vs_new_estimator(&ip_vs_stats); + ip_vs_new_estimator(net, &ip_vs_stats); return 0; err_reg: @@ -3472,7 +3473,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - ip_vs_kill_estimator(&ip_vs_stats); + ip_vs_kill_estimator(net, &ip_vs_stats); unregister_net_sysctl_table(sysctl_header); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); @@ -3536,7 +3537,6 @@ void ip_vs_control_cleanup(void) ip_vs_trash_cleanup(); cancel_delayed_work_sync(&defense_work); cancel_work_sync(&defense_work.work); - ip_vs_kill_estimator(&ip_vs_stats); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 7417a0c1408b..07d839bef537 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -8,8 +8,12 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Changes: - * + * Changes: Hans Schillstrom + * Network name space (netns) aware. + * Global data moved to netns i.e struct netns_ipvs + * Affected data: est_list and est_lock. + * estimation_timer() runs with timer per netns. + * get_stats()) do the per cpu summing. */ #define KMSG_COMPONENT "IPVS" @@ -48,12 +52,6 @@ */ -static void estimation_timer(unsigned long arg); - -static LIST_HEAD(est_list); -static DEFINE_SPINLOCK(est_lock); -static DEFINE_TIMER(est_timer, estimation_timer, 0, 0); - static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; @@ -62,9 +60,12 @@ static void estimation_timer(unsigned long arg) u32 n_inpkts, n_outpkts; u64 n_inbytes, n_outbytes; u32 rate; + struct net *net = (struct net *)arg; + struct netns_ipvs *ipvs; - spin_lock(&est_lock); - list_for_each_entry(e, &est_list, list) { + ipvs = net_ipvs(net); + spin_lock(&ipvs->est_lock); + list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); @@ -75,38 +76,39 @@ static void estimation_timer(unsigned long arg) n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ - rate = (n_conns - e->last_conns)<<9; + rate = (n_conns - e->last_conns) << 9; e->last_conns = n_conns; - e->cps += ((long)rate - (long)e->cps)>>2; - s->ustats.cps = (e->cps+0x1FF)>>10; + e->cps += ((long)rate - (long)e->cps) >> 2; + s->ustats.cps = (e->cps + 0x1FF) >> 10; - rate = (n_inpkts - e->last_inpkts)<<9; + rate = (n_inpkts - e->last_inpkts) << 9; e->last_inpkts = n_inpkts; - e->inpps += ((long)rate - (long)e->inpps)>>2; - s->ustats.inpps = (e->inpps+0x1FF)>>10; + e->inpps += ((long)rate - (long)e->inpps) >> 2; + s->ustats.inpps = (e->inpps + 0x1FF) >> 10; - rate = (n_outpkts - e->last_outpkts)<<9; + rate = (n_outpkts - e->last_outpkts) << 9; e->last_outpkts = n_outpkts; - e->outpps += ((long)rate - (long)e->outpps)>>2; - s->ustats.outpps = (e->outpps+0x1FF)>>10; + e->outpps += ((long)rate - (long)e->outpps) >> 2; + s->ustats.outpps = (e->outpps + 0x1FF) >> 10; - rate = (n_inbytes - e->last_inbytes)<<4; + rate = (n_inbytes - e->last_inbytes) << 4; e->last_inbytes = n_inbytes; - e->inbps += ((long)rate - (long)e->inbps)>>2; - s->ustats.inbps = (e->inbps+0xF)>>5; + e->inbps += ((long)rate - (long)e->inbps) >> 2; + s->ustats.inbps = (e->inbps + 0xF) >> 5; - rate = (n_outbytes - e->last_outbytes)<<4; + rate = (n_outbytes - e->last_outbytes) << 4; e->last_outbytes = n_outbytes; - e->outbps += ((long)rate - (long)e->outbps)>>2; - s->ustats.outbps = (e->outbps+0xF)>>5; + e->outbps += ((long)rate - (long)e->outbps) >> 2; + s->ustats.outbps = (e->outbps + 0xF) >> 5; spin_unlock(&s->lock); } - spin_unlock(&est_lock); - mod_timer(&est_timer, jiffies + 2*HZ); + spin_unlock(&ipvs->est_lock); + mod_timer(&ipvs->est_timer, jiffies + 2*HZ); } -void ip_vs_new_estimator(struct ip_vs_stats *stats) +void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; INIT_LIST_HEAD(&est->list); @@ -126,18 +128,19 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) est->last_outbytes = stats->ustats.outbytes; est->outbps = stats->ustats.outbps<<5; - spin_lock_bh(&est_lock); - list_add(&est->list, &est_list); - spin_unlock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); + list_add(&est->list, &ipvs->est_list); + spin_unlock_bh(&ipvs->est_lock); } -void ip_vs_kill_estimator(struct ip_vs_stats *stats) +void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_estimator *est = &stats->est; - spin_lock_bh(&est_lock); + spin_lock_bh(&ipvs->est_lock); list_del(&est->list); - spin_unlock_bh(&est_lock); + spin_unlock_bh(&ipvs->est_lock); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) @@ -159,14 +162,25 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) static int __net_init __ip_vs_estimator_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + INIT_LIST_HEAD(&ipvs->est_list); + spin_lock_init(&ipvs->est_lock); + setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net); + mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } +static void __net_exit __ip_vs_estimator_exit(struct net *net) +{ + del_timer_sync(&net_ipvs(net)->est_timer); +} static struct pernet_operations ip_vs_app_ops = { .init = __ip_vs_estimator_init, + .exit = __ip_vs_estimator_exit, }; int __init ip_vs_estimator_init(void) @@ -174,14 +188,10 @@ int __init ip_vs_estimator_init(void) int rv; rv = register_pernet_subsys(&ip_vs_app_ops); - if (rv < 0) - return rv; - mod_timer(&est_timer, jiffies + 2 * HZ); return rv; } void ip_vs_estimator_cleanup(void) { - del_timer_sync(&est_timer); unregister_pernet_subsys(&ip_vs_app_ops); } -- cgit v1.2.3-59-g8ed1b From f131315fa272d337dfca7dad2f033ff5296dad65 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:55 +0100 Subject: IPVS: netns awareness to ip_vs_sync All global variables moved to struct ipvs, most external changes fixed (i.e. init_net removed) in sync_buf create + 4 replaced by sizeof(struct..) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 14 +- include/net/netns/ip_vs.h | 16 ++ net/netfilter/ipvs/ip_vs_core.c | 15 +- net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++--- net/netfilter/ipvs/ip_vs_sync.c | 334 +++++++++++++++++++++------------------- 5 files changed, 240 insertions(+), 191 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index c08927bb1728..4265b5e00c94 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -958,7 +958,7 @@ extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern int sysctl_ip_vs_sync_ver; -extern void ip_vs_sync_switch_mode(int mode); +extern void ip_vs_sync_switch_mode(struct net *net, int mode); extern struct ip_vs_service * ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport); @@ -987,14 +987,10 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); * IPVS sync daemon data and function prototypes * (from ip_vs_sync.c) */ -extern volatile int ip_vs_sync_state; -extern volatile int ip_vs_master_syncid; -extern volatile int ip_vs_backup_syncid; -extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid); -extern int stop_sync_thread(int state); -extern void ip_vs_sync_conn(struct ip_vs_conn *cp); +extern int start_sync_thread(struct net *net, int state, char *mcast_ifn, + __u8 syncid); +extern int stop_sync_thread(struct net *net, int state); +extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp); extern int ip_vs_sync_init(void); extern void ip_vs_sync_cleanup(void); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index db0240198339..aba78f3c8341 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -74,6 +74,22 @@ struct netns_ipvs { struct list_head est_list; /* estimator list */ spinlock_t est_lock; struct timer_list est_timer; /* Estimation timer */ + /* ip_vs_sync */ + struct list_head sync_queue; + spinlock_t sync_lock; + struct ip_vs_sync_buff *sync_buff; + spinlock_t sync_buff_lock; + struct sockaddr_in sync_mcast_addr; + struct task_struct *master_thread; + struct task_struct *backup_thread; + int send_mesg_maxlen; + int recv_mesg_maxlen; + volatile int sync_state; + volatile int master_syncid; + volatile int backup_syncid; + /* multicast interface name */ + char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; + char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 9317affc5ea1..5531d569aa5e 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1471,12 +1471,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) static unsigned int ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) { - struct net *net = NULL; + struct net *net; struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, restart, pkts; + struct netns_ipvs *ipvs; /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) @@ -1556,7 +1557,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - + net = skb_net(skb); + ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ @@ -1589,12 +1591,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) * * For ONE_PKT let ip_vs_sync_conn() do the filter work. */ + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) pkts = sysctl_ip_vs_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && (pkts % sysctl_ip_vs_sync_threshold[1] @@ -1603,13 +1606,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) { - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); goto out; } } /* Keep this block last: TCP and others with pp->num_states <= 1 */ - else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (pkts % sysctl_ip_vs_sync_threshold[1] @@ -1619,7 +1622,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) (cp->state == IP_VS_TCP_S_CLOSE) || (cp->state == IP_VS_TCP_S_CLOSE_WAIT) || (cp->state == IP_VS_TCP_S_TIME_WAIT))))) - ip_vs_sync_conn(cp); + ip_vs_sync_conn(net, cp); out: cp->old_state = cp->state; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c89beb8eafbb..03f86312b4bb 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1559,7 +1559,8 @@ proc_do_sync_mode(ctl_table *table, int write, /* Restore the correct value */ *valp = val; } else { - ip_vs_sync_switch_mode(val); + struct net *net = current->nsproxy->net_ns; + ip_vs_sync_switch_mode(net, val); } } return rc; @@ -2174,11 +2175,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); + ret = start_sync_thread(net, dm->state, dm->mcast_ifn, + dm->syncid); goto out_unlock; } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; - ret = stop_sync_thread(dm->state); + ret = stop_sync_thread(net, dm->state); goto out_unlock; } @@ -2424,6 +2426,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int ret = 0; unsigned int copylen; struct net *net = sock_net(sk); + struct netns_ipvs *ipvs = net_ipvs(net); BUG_ON(!net); if (!capable(CAP_NET_ADMIN)) @@ -2546,15 +2549,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_daemon_user d[2]; memset(&d, 0, sizeof(d)); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) { + if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); - d[0].syncid = ip_vs_master_syncid; + strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn, + sizeof(d[0].mcast_ifn)); + d[0].syncid = ipvs->master_syncid; } - if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { + if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); - d[1].syncid = ip_vs_backup_syncid; + strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn, + sizeof(d[1].mcast_ifn)); + d[1].syncid = ipvs->backup_syncid; } if (copy_to_user(user, &d, sizeof(d)) != 0) ret = -EFAULT; @@ -3061,20 +3066,23 @@ nla_put_failure: static int ip_vs_genl_dump_daemons(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = skb_net(skb); + struct netns_ipvs *ipvs = net_ipvs(net); + mutex_lock(&__ip_vs_mutex); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, - ip_vs_master_mcast_ifn, - ip_vs_master_syncid, cb) < 0) + ipvs->master_mcast_ifn, + ipvs->master_syncid, cb) < 0) goto nla_put_failure; cb->args[0] = 1; } - if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, - ip_vs_backup_mcast_ifn, - ip_vs_backup_syncid, cb) < 0) + ipvs->backup_mcast_ifn, + ipvs->backup_syncid, cb) < 0) goto nla_put_failure; cb->args[1] = 1; @@ -3086,24 +3094,26 @@ nla_put_failure: return skb->len; } -static int ip_vs_genl_new_daemon(struct nlattr **attrs) +static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs) { if (!(attrs[IPVS_DAEMON_ATTR_STATE] && attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; - return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + return start_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); } -static int ip_vs_genl_del_daemon(struct nlattr **attrs) +static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs) { if (!attrs[IPVS_DAEMON_ATTR_STATE]) return -EINVAL; - return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); + return stop_sync_thread(net, + nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); } static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs) @@ -3159,9 +3169,9 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } if (cmd == IPVS_CMD_NEW_DAEMON) - ret = ip_vs_genl_new_daemon(daemon_attrs); + ret = ip_vs_genl_new_daemon(net, daemon_attrs); else - ret = ip_vs_genl_del_daemon(daemon_attrs); + ret = ip_vs_genl_del_daemon(net, daemon_attrs); goto out; } else if (cmd == IPVS_CMD_ZERO && !info->attrs[IPVS_CMD_ATTR_SERVICE]) { diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6831e8fac8db..c29e73d686fb 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -192,6 +192,7 @@ union ip_vs_sync_conn { #define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1)) struct ip_vs_sync_thread_data { + struct net *net; struct socket *sock; char *buf; }; @@ -259,10 +260,6 @@ struct ip_vs_sync_mesg { /* ip_vs_sync_conn entries start here */ }; -/* the maximum length of sync (sending/receiving) message */ -static int sync_send_mesg_maxlen; -static int sync_recv_mesg_maxlen; - struct ip_vs_sync_buff { struct list_head list; unsigned long firstuse; @@ -273,28 +270,6 @@ struct ip_vs_sync_buff { unsigned char *end; }; - -/* the sync_buff list head and the lock */ -static LIST_HEAD(ip_vs_sync_queue); -static DEFINE_SPINLOCK(ip_vs_sync_lock); - -/* current sync_buff for accepting new conn entries */ -static struct ip_vs_sync_buff *curr_sb = NULL; -static DEFINE_SPINLOCK(curr_sb_lock); - -/* ipvs sync daemon state */ -volatile int ip_vs_sync_state = IP_VS_STATE_NONE; -volatile int ip_vs_master_syncid = 0; -volatile int ip_vs_backup_syncid = 0; - -/* multicast interface name */ -char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; -char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; - -/* sync daemon tasks */ -static struct task_struct *sync_master_thread; -static struct task_struct *sync_backup_thread; - /* multicast addr */ static struct sockaddr_in mcast_addr = { .sin_family = AF_INET, @@ -324,20 +299,20 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no) put_unaligned_be32(ho->previous_delta, &no->previous_delta); } -static inline struct ip_vs_sync_buff *sb_dequeue(void) +static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&ip_vs_sync_lock); - if (list_empty(&ip_vs_sync_queue)) { + spin_lock_bh(&ipvs->sync_lock); + if (list_empty(&ipvs->sync_queue)) { sb = NULL; } else { - sb = list_entry(ip_vs_sync_queue.next, + sb = list_entry(ipvs->sync_queue.next, struct ip_vs_sync_buff, list); list_del(&sb->list); } - spin_unlock_bh(&ip_vs_sync_lock); + spin_unlock_bh(&ipvs->sync_lock); return sb; } @@ -345,25 +320,27 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void) /* * Create a new sync buffer for Version 1 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */ sb->mesg->version = SYNC_PROTO_VER; - sb->mesg->syncid = ip_vs_master_syncid; + sb->mesg->syncid = ipvs->master_syncid; sb->mesg->size = sizeof(struct ip_vs_sync_mesg); sb->mesg->nr_conns = 0; sb->mesg->spare = 0; sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg); - sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; + sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; @@ -375,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) kfree(sb); } -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) +static inline void sb_queue_tail(struct netns_ipvs *ipvs) { - spin_lock(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&sb->list, &ip_vs_sync_queue); + struct ip_vs_sync_buff *sb = ipvs->sync_buff; + + spin_lock(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&sb->list, &ipvs->sync_queue); else ip_vs_sync_buff_release(sb); - spin_unlock(&ip_vs_sync_lock); + spin_unlock(&ipvs->sync_lock); } /* @@ -390,18 +369,18 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) * than the specified time or the specified time is zero. */ static inline struct ip_vs_sync_buff * -get_curr_sync_buff(unsigned long time) +get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time) { struct ip_vs_sync_buff *sb; - spin_lock_bh(&curr_sb_lock); - if (curr_sb && (time == 0 || - time_before(jiffies - curr_sb->firstuse, time))) { - sb = curr_sb; - curr_sb = NULL; + spin_lock_bh(&ipvs->sync_buff_lock); + if (ipvs->sync_buff && (time == 0 || + time_before(jiffies - ipvs->sync_buff->firstuse, time))) { + sb = ipvs->sync_buff; + ipvs->sync_buff = NULL; } else sb = NULL; - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); return sb; } @@ -409,33 +388,37 @@ get_curr_sync_buff(unsigned long time) * Switch mode from sending version 0 or 1 * - must handle sync_buf */ -void ip_vs_sync_switch_mode(int mode) { +void ip_vs_sync_switch_mode(struct net *net, int mode) +{ + struct netns_ipvs *ipvs = net_ipvs(net); - if (!ip_vs_sync_state & IP_VS_STATE_MASTER) + if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !curr_sb) + if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff) return; - spin_lock_bh(&curr_sb_lock); + spin_lock_bh(&ipvs->sync_buff_lock); /* Buffer empty ? then let buf_create do the job */ - if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { - kfree(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) { + kfree(ipvs->sync_buff); + ipvs->sync_buff = NULL; } else { - spin_lock_bh(&ip_vs_sync_lock); - if (ip_vs_sync_state & IP_VS_STATE_MASTER) - list_add_tail(&curr_sb->list, &ip_vs_sync_queue); + spin_lock_bh(&ipvs->sync_lock); + if (ipvs->sync_state & IP_VS_STATE_MASTER) + list_add_tail(&ipvs->sync_buff->list, + &ipvs->sync_queue); else - ip_vs_sync_buff_release(curr_sb); - spin_unlock_bh(&ip_vs_sync_lock); + ip_vs_sync_buff_release(ipvs->sync_buff); + spin_unlock_bh(&ipvs->sync_lock); } - spin_unlock_bh(&curr_sb_lock); + spin_unlock_bh(&ipvs->sync_buff_lock); } /* * Create a new sync buffer for Version 0 proto. */ -static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) +static inline struct ip_vs_sync_buff * +ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs) { struct ip_vs_sync_buff *sb; struct ip_vs_sync_mesg_v0 *mesg; @@ -443,16 +426,17 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) return NULL; - if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { + sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC); + if (!sb->mesg) { kfree(sb); return NULL; } mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg; mesg->nr_conns = 0; - mesg->syncid = ip_vs_master_syncid; - mesg->size = 4; - sb->head = (unsigned char *)mesg + 4; - sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen; + mesg->syncid = ipvs->master_syncid; + mesg->size = sizeof(struct ip_vs_sync_mesg_v0); + sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0); + sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen; sb->firstuse = jiffies; return sb; } @@ -461,8 +445,9 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void) * Version 0 , could be switched in by sys_ctl. * Add an ip_vs_conn information into the current sync_buff. */ -void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) +void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg_v0 *m; struct ip_vs_sync_conn_v0 *s; int len; @@ -473,10 +458,12 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return; - spin_lock(&curr_sb_lock); - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create_v0())) { - spin_unlock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = + ip_vs_sync_buff_create_v0(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } @@ -484,8 +471,8 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : SIMPLE_CONN_SIZE; - m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg; - s = (struct ip_vs_sync_conn_v0 *)curr_sb->head; + m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg; + s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head; /* copy members */ s->reserved = 0; @@ -506,18 +493,18 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) m->nr_conns++; m->size += len; - curr_sb->head += len; + ipvs->sync_buff->head += len; /* check if there is a space for next one */ - if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); /* synchronize its controller if it has */ if (cp->control) - ip_vs_sync_conn(cp->control); + ip_vs_sync_conn(net, cp->control); } /* @@ -525,8 +512,9 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp) * Called by ip_vs_in. * Sending Version 1 messages */ -void ip_vs_sync_conn(struct ip_vs_conn *cp) +void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m; union ip_vs_sync_conn *s; __u8 *p; @@ -534,7 +522,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) /* Handle old version of the protocol */ if (sysctl_ip_vs_sync_ver == 0) { - ip_vs_sync_conn_v0(cp); + ip_vs_sync_conn_v0(net, cp); return; } /* Do not sync ONE PACKET */ @@ -551,7 +539,7 @@ sloop: pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN); } - spin_lock(&curr_sb_lock); + spin_lock(&ipvs->sync_buff_lock); #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -570,26 +558,27 @@ sloop: /* check if there is a space for this one */ pad = 0; - if (curr_sb) { - pad = (4 - (size_t)curr_sb->head) & 3; - if (curr_sb->head + len + pad > curr_sb->end) { - sb_queue_tail(curr_sb); - curr_sb = NULL; + if (ipvs->sync_buff) { + pad = (4 - (size_t)ipvs->sync_buff->head) & 3; + if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) { + sb_queue_tail(ipvs); + ipvs->sync_buff = NULL; pad = 0; } } - if (!curr_sb) { - if (!(curr_sb=ip_vs_sync_buff_create())) { - spin_unlock(&curr_sb_lock); + if (!ipvs->sync_buff) { + ipvs->sync_buff = ip_vs_sync_buff_create(ipvs); + if (!ipvs->sync_buff) { + spin_unlock(&ipvs->sync_buff_lock); pr_err("ip_vs_sync_buff_create failed.\n"); return; } } - m = curr_sb->mesg; - p = curr_sb->head; - curr_sb->head += pad + len; + m = ipvs->sync_buff->mesg; + p = ipvs->sync_buff->head; + ipvs->sync_buff->head += pad + len; m->size += pad + len; /* Add ev. padding from prev. sync_conn */ while (pad--) @@ -647,7 +636,7 @@ sloop: } } - spin_unlock(&curr_sb_lock); + spin_unlock(&ipvs->sync_buff_lock); control: /* synchronize its controller if it has */ @@ -699,7 +688,8 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, buff[pe_name_len]=0; p->pe = __ip_vs_pe_getbyname(buff); if (!p->pe) { - IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff); + IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", + buff); return 1; } } else { @@ -748,7 +738,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr, + dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, param->vport, protocol, fwmark); /* Set the approprite ativity flag */ @@ -1089,6 +1079,7 @@ out: static void ip_vs_process_message(struct net *net, __u8 *buffer, const size_t buflen) { + struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer; __u8 *p, *msg_end; int i, nr_conns; @@ -1105,7 +1096,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer, return; } /* SyncID sanity check */ - if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) { + if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) { IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid); return; } @@ -1190,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname) { struct net_device *dev; struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) @@ -1210,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname) * Set the maximum length of sync message according to the * specified interface's MTU. */ -static int set_sync_mesg_maxlen(int sync_state) +static int set_sync_mesg_maxlen(struct net *net, int sync_state) { + struct netns_ipvs *ipvs = net_ipvs(net); struct net_device *dev; int num; if (sync_state == IP_VS_STATE_MASTER) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->master_mcast_ifn); + if (!dev) return -ENODEV; num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " - "message %d.\n", sync_send_mesg_maxlen); + "message %d.\n", ipvs->send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { - if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL) + dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn); + if (!dev) return -ENODEV; - sync_recv_mesg_maxlen = dev->mtu - + ipvs->recv_mesg_maxlen = dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr); IP_VS_DBG(7, "setting the maximum length of sync receiving " - "message %d.\n", sync_recv_mesg_maxlen); + "message %d.\n", ipvs->recv_mesg_maxlen); } return 0; @@ -1248,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state) static int join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) { + struct net *net = sock_net(sk); struct ip_mreqn mreq; struct net_device *dev; int ret; @@ -1255,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) memset(&mreq, 0, sizeof(mreq)); memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) return -EINVAL; @@ -1272,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) static int bind_mcastif_addr(struct socket *sock, char *ifname) { + struct net *net = sock_net(sock->sk); struct net_device *dev; __be32 addr; struct sockaddr_in sin; - if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL) + dev = __dev_get_by_name(net, ifname); + if (!dev) return -ENODEV; addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); @@ -1298,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname) /* * Set up sending multicast socket over UDP */ -static struct socket * make_send_sock(void) +static struct socket *make_send_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1310,7 +1311,7 @@ static struct socket * make_send_sock(void) return ERR_PTR(result); } - result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn); + result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error setting outbound mcast interface\n"); goto error; @@ -1319,7 +1320,7 @@ static struct socket * make_send_sock(void) set_mcast_loop(sock->sk, 0); set_mcast_ttl(sock->sk, 1); - result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn); + result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn); if (result < 0) { pr_err("Error binding address of the mcast interface\n"); goto error; @@ -1343,8 +1344,9 @@ static struct socket * make_send_sock(void) /* * Set up receiving multicast socket over UDP */ -static struct socket * make_receive_sock(void) +static struct socket *make_receive_sock(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); struct socket *sock; int result; @@ -1368,7 +1370,7 @@ static struct socket * make_receive_sock(void) /* join the multicast group */ result = join_mcast_group(sock->sk, (struct in_addr *) &mcast_addr.sin_addr, - ip_vs_backup_mcast_ifn); + ipvs->backup_mcast_ifn); if (result < 0) { pr_err("Error joining to the multicast group\n"); goto error; @@ -1439,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) static int sync_thread_master(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); struct ip_vs_sync_buff *sb; pr_info("sync thread started: state = MASTER, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_master_mcast_ifn, ip_vs_master_syncid); + ipvs->master_mcast_ifn, ipvs->master_syncid); while (!kthread_should_stop()) { - while ((sb = sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); } - /* check if entries stay in curr_sb for 2 seconds */ - sb = get_curr_sync_buff(2 * HZ); + /* check if entries stay in ipvs->sync_buff for 2 seconds */ + sb = get_curr_sync_buff(ipvs, 2 * HZ); if (sb) { ip_vs_send_sync_msg(tinfo->sock, sb->mesg); ip_vs_sync_buff_release(sb); @@ -1462,14 +1465,13 @@ static int sync_thread_master(void *data) } /* clean up the sync_buff queue */ - while ((sb=sb_dequeue())) { + while ((sb = sb_dequeue(ipvs))) ip_vs_sync_buff_release(sb); - } /* clean up the current sync_buff */ - if ((sb = get_curr_sync_buff(0))) { + sb = get_curr_sync_buff(ipvs, 0); + if (sb) ip_vs_sync_buff_release(sb); - } /* release the sending multicast socket */ sock_release(tinfo->sock); @@ -1482,11 +1484,12 @@ static int sync_thread_master(void *data) static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; + struct netns_ipvs *ipvs = net_ipvs(tinfo->net); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " "syncid = %d\n", - ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); + ipvs->backup_mcast_ifn, ipvs->backup_syncid); while (!kthread_should_stop()) { wait_event_interruptible(*sk_sleep(tinfo->sock->sk), @@ -1496,7 +1499,7 @@ static int sync_thread_backup(void *data) /* do we have data now? */ while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { len = ip_vs_receive(tinfo->sock, tinfo->buf, - sync_recv_mesg_maxlen); + ipvs->recv_mesg_maxlen); if (len <= 0) { pr_err("receiving message error\n"); break; @@ -1505,7 +1508,7 @@ static int sync_thread_backup(void *data) /* disable bottom half, because it accesses the data shared by softirq while getting/creating conns */ local_bh_disable(); - ip_vs_process_message(&init_net, tinfo->buf, len); + ip_vs_process_message(tinfo->net, tinfo->buf, len); local_bh_enable(); } } @@ -1519,11 +1522,12 @@ static int sync_thread_backup(void *data) } -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) +int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid) { struct ip_vs_sync_thread_data *tinfo; struct task_struct **realtask, *task; struct socket *sock; + struct netns_ipvs *ipvs = net_ipvs(net); char *name, *buf = NULL; int (*threadfn)(void *data); int result = -ENOMEM; @@ -1533,27 +1537,27 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) sizeof(struct ip_vs_sync_conn_v0)); if (state == IP_VS_STATE_MASTER) { - if (sync_master_thread) + if (ipvs->master_thread) return -EEXIST; - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, - sizeof(ip_vs_master_mcast_ifn)); - ip_vs_master_syncid = syncid; - realtask = &sync_master_thread; - name = "ipvs_syncmaster"; + strlcpy(ipvs->master_mcast_ifn, mcast_ifn, + sizeof(ipvs->master_mcast_ifn)); + ipvs->master_syncid = syncid; + realtask = &ipvs->master_thread; + name = "ipvs_master:%d"; threadfn = sync_thread_master; - sock = make_send_sock(); + sock = make_send_sock(net); } else if (state == IP_VS_STATE_BACKUP) { - if (sync_backup_thread) + if (ipvs->backup_thread) return -EEXIST; - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, - sizeof(ip_vs_backup_mcast_ifn)); - ip_vs_backup_syncid = syncid; - realtask = &sync_backup_thread; - name = "ipvs_syncbackup"; + strlcpy(ipvs->backup_mcast_ifn, mcast_ifn, + sizeof(ipvs->backup_mcast_ifn)); + ipvs->backup_syncid = syncid; + realtask = &ipvs->backup_thread; + name = "ipvs_backup:%d"; threadfn = sync_thread_backup; - sock = make_receive_sock(); + sock = make_receive_sock(net); } else { return -EINVAL; } @@ -1563,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) goto out; } - set_sync_mesg_maxlen(state); + set_sync_mesg_maxlen(net, state); if (state == IP_VS_STATE_BACKUP) { - buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL); + buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL); if (!buf) goto outsocket; } @@ -1574,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) if (!tinfo) goto outbuf; + tinfo->net = net; tinfo->sock = sock; tinfo->buf = buf; - task = kthread_run(threadfn, tinfo, name); + task = kthread_run(threadfn, tinfo, name, ipvs->gen); if (IS_ERR(task)) { result = PTR_ERR(task); goto outtinfo; @@ -1585,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) /* mark as active */ *realtask = task; - ip_vs_sync_state |= state; + ipvs->sync_state |= state; /* increase the module use count */ ip_vs_use_count_inc(); @@ -1603,16 +1608,18 @@ out: } -int stop_sync_thread(int state) +int stop_sync_thread(struct net *net, int state) { + struct netns_ipvs *ipvs = net_ipvs(net); + IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current)); if (state == IP_VS_STATE_MASTER) { - if (!sync_master_thread) + if (!ipvs->master_thread) return -ESRCH; pr_info("stopping master sync thread %d ...\n", - task_pid_nr(sync_master_thread)); + task_pid_nr(ipvs->master_thread)); /* * The lock synchronizes with sb_queue_tail(), so that we don't @@ -1620,21 +1627,21 @@ int stop_sync_thread(int state) * progress of stopping the master sync daemon. */ - spin_lock_bh(&ip_vs_sync_lock); - ip_vs_sync_state &= ~IP_VS_STATE_MASTER; - spin_unlock_bh(&ip_vs_sync_lock); - kthread_stop(sync_master_thread); - sync_master_thread = NULL; + spin_lock_bh(&ipvs->sync_lock); + ipvs->sync_state &= ~IP_VS_STATE_MASTER; + spin_unlock_bh(&ipvs->sync_lock); + kthread_stop(ipvs->master_thread); + ipvs->master_thread = NULL; } else if (state == IP_VS_STATE_BACKUP) { - if (!sync_backup_thread) + if (!ipvs->backup_thread) return -ESRCH; pr_info("stopping backup sync thread %d ...\n", - task_pid_nr(sync_backup_thread)); + task_pid_nr(ipvs->backup_thread)); - ip_vs_sync_state &= ~IP_VS_STATE_BACKUP; - kthread_stop(sync_backup_thread); - sync_backup_thread = NULL; + ipvs->sync_state &= ~IP_VS_STATE_BACKUP; + kthread_stop(ipvs->backup_thread); + ipvs->backup_thread = NULL; } else { return -EINVAL; } @@ -1650,12 +1657,29 @@ int stop_sync_thread(int state) */ static int __net_init __ip_vs_sync_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return -EPERM; + + INIT_LIST_HEAD(&ipvs->sync_queue); + spin_lock_init(&ipvs->sync_lock); + spin_lock_init(&ipvs->sync_buff_lock); + + ipvs->sync_mcast_addr.sin_family = AF_INET; + ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT); + ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP); return 0; } static void __ip_vs_sync_cleanup(struct net *net) { + if (!net_eq(net, &init_net)) /* netns not enabled yet */ + return; + stop_sync_thread(net, IP_VS_STATE_MASTER); + stop_sync_thread(net, IP_VS_STATE_BACKUP); } + static struct pernet_operations ipvs_sync_ops = { .init = __ip_vs_sync_init, .exit = __ip_vs_sync_cleanup, -- cgit v1.2.3-59-g8ed1b From b17fc9963f837ef1acfe36e193108fb16ed58647 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:56 +0100 Subject: IPVS: netns, ip_vs_stats and its procfs The statistic counter locks for every packet are now removed, and that statistic is now per CPU, i.e. no locks needed. However summing is made in ip_vs_est into ip_vs_stats struct which is moved to ipvs struc. procfs, ip_vs_stats now have a "per cpu" count and a grand total. A new function seq_file_single_net() in ip_vs.h created for handling of single_open_net() since it does not place net ptr in a struct, like others. /var/lib/lxc # cat /proc/net/ip_vs_stats_percpu Total Incoming Outgoing Incoming Outgoing CPU Conns Packets Packets Bytes Bytes 0 0 3 1 9D 34 1 0 1 2 49 70 2 0 1 2 34 76 3 1 2 2 70 74 ~ 1 7 7 18A 18E Conns/s Pkts/s Pkts/s Bytes/s Bytes/s 0 0 0 0 0 *v3 ip_vs_stats reamains as before, instead ip_vs_stats_percpu is added. u64 seq lock added *v4 Bug correction inbytes and outbytes as own vars.. per_cpu counter for all stats now as suggested by Julian. [horms@verge.net.au: removed whitespace-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 51 ++++++++++++++- include/net/netns/ip_vs.h | 4 ++ net/netfilter/ipvs/ip_vs_core.c | 89 ++++++++++++++------------ net/netfilter/ipvs/ip_vs_ctl.c | 134 ++++++++++++++++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_est.c | 39 ++++++++++++ 5 files changed, 256 insertions(+), 61 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 4265b5e00c94..605d5db81a39 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -90,6 +90,18 @@ static inline struct net *skb_sknet(struct sk_buff *skb) return &init_net; #endif } +/* + * This one needed for single_open_net since net is stored directly in + * private not as a struct i.e. seq_file_net cant be used. + */ +static inline struct net *seq_file_single_net(struct seq_file *seq) +{ +#ifdef CONFIG_NET_NS + return (struct net *)seq->private; +#else + return &init_net; +#endif +} /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -320,6 +332,23 @@ struct ip_vs_seq { before last resized pkt */ }; +/* + * counters per cpu + */ +struct ip_vs_counters { + __u32 conns; /* connections scheduled */ + __u32 inpkts; /* incoming packets */ + __u32 outpkts; /* outgoing packets */ + __u64 inbytes; /* incoming bytes */ + __u64 outbytes; /* outgoing bytes */ +}; +/* + * Stats per cpu + */ +struct ip_vs_cpu_stats { + struct ip_vs_counters ustats; + struct u64_stats_sync syncp; +}; /* * IPVS statistics objects @@ -341,12 +370,28 @@ struct ip_vs_estimator { }; struct ip_vs_stats { - struct ip_vs_stats_user ustats; /* statistics */ + struct ip_vs_stats_user ustats; /* statistics */ struct ip_vs_estimator est; /* estimator */ - - spinlock_t lock; /* spin lock */ + struct ip_vs_cpu_stats *cpustats; /* per cpu counters */ + spinlock_t lock; /* spin lock */ }; +/* + * Helper Macros for per cpu + * ipvs->tot_stats->ustats.count + */ +#define IPVS_STAT_INC(ipvs, count) \ + __this_cpu_inc((ipvs)->ustats->count) + +#define IPVS_STAT_ADD(ipvs, count, value) \ + do {\ + write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \ + raw_smp_processor_id())); \ + __this_cpu_add((ipvs)->ustats->count, value); \ + write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \ + raw_smp_processor_id())); \ + } while (0) + struct dst_entry; struct iphdr; struct ip_vs_conn; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index aba78f3c8341..bd1dad872178 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -61,6 +61,10 @@ struct netns_ipvs { struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; spinlock_t sctp_app_lock; #endif + /* ip_vs_ctl */ + struct ip_vs_stats *tot_stats; /* Statistics & est. */ + struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ + seqcount_t *ustats_seq; /* u64 read retry */ /* ip_vs_lblc */ int sysctl_lblc_expiration; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5531d569aa5e..7e6a2a046bf5 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -115,21 +115,28 @@ static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.inpkts++; - dest->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.inpkts++; - dest->svc->stats.ustats.inbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.inpkts++; - ip_vs_stats.ustats.inbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.inpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.inbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -138,21 +145,28 @@ static inline void ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { struct ip_vs_dest *dest = cp->dest; + struct netns_ipvs *ipvs = net_ipvs(skb_net(skb)); + if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { - spin_lock(&dest->stats.lock); - dest->stats.ustats.outpkts++; - dest->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->stats.lock); - - spin_lock(&dest->svc->stats.lock); - dest->svc->stats.ustats.outpkts++; - dest->svc->stats.ustats.outbytes += skb->len; - spin_unlock(&dest->svc->stats.lock); - - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.outpkts++; - ip_vs_stats.ustats.outbytes += skb->len; - spin_unlock(&ip_vs_stats.lock); + struct ip_vs_cpu_stats *s; + + s = this_cpu_ptr(dest->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(dest->svc->stats.cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.outpkts++; + u64_stats_update_begin(&s->syncp); + s->ustats.outbytes += skb->len; + u64_stats_update_end(&s->syncp); } } @@ -160,17 +174,17 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { - spin_lock(&cp->dest->stats.lock); - cp->dest->stats.ustats.conns++; - spin_unlock(&cp->dest->stats.lock); + struct netns_ipvs *ipvs = net_ipvs(svc->net); + struct ip_vs_cpu_stats *s; - spin_lock(&svc->stats.lock); - svc->stats.ustats.conns++; - spin_unlock(&svc->stats.lock); + s = this_cpu_ptr(cp->dest->stats.cpustats); + s->ustats.conns++; - spin_lock(&ip_vs_stats.lock); - ip_vs_stats.ustats.conns++; - spin_unlock(&ip_vs_stats.lock); + s = this_cpu_ptr(svc->stats.cpustats); + s->ustats.conns++; + + s = this_cpu_ptr(ipvs->cpustats); + s->ustats.conns++; } @@ -1841,7 +1855,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { }, #endif }; - /* * Initialize IP Virtual Server netns mem. */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 03f86312b4bb..cbd58c60e1bf 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -257,8 +257,7 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - struct net *net = &init_net; - struct netns_ipvs *ipvs = net_ipvs(net); + struct netns_ipvs *ipvs = net_ipvs(&init_net); update_defense_level(ipvs); if (atomic_read(&ip_vs_dropentry)) @@ -519,6 +518,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } } @@ -722,6 +722,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -747,6 +748,7 @@ static void ip_vs_trash_cleanup(void) list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); + free_percpu(dest->stats.cpustats); kfree(dest); } } @@ -868,6 +870,11 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!dest->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } dest->af = svc->af; dest->protocol = svc->protocol; @@ -891,6 +898,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, LeaveFunction(2); return 0; + +err_alloc: + kfree(dest); + return -ENOMEM; } @@ -1037,6 +1048,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) and only one user context can update virtual service at a time, so the operation here is OK */ atomic_dec(&dest->svc->refcnt); + free_percpu(dest->stats.cpustats); kfree(dest); } else { IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " @@ -1163,6 +1175,11 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ret = -ENOMEM; goto out_err; } + svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!svc->stats.cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto out_err; + } /* I'm the first user of the service */ atomic_set(&svc->usecnt, 0); @@ -1212,6 +1229,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, *svc_p = svc; return 0; + out_err: if (svc != NULL) { ip_vs_unbind_scheduler(svc); @@ -1220,6 +1238,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, ip_vs_app_inc_put(svc->inc); local_bh_enable(); } + if (svc->stats.cpustats) + free_percpu(svc->stats.cpustats); kfree(svc); } ip_vs_scheduler_put(sched); @@ -1388,6 +1408,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) svc->fwmark, IP_VS_DBG_ADDR(svc->af, &svc->addr), ntohs(svc->port), atomic_read(&svc->usecnt)); + free_percpu(svc->stats.cpustats); kfree(svc); } @@ -1499,7 +1520,7 @@ static int ip_vs_zero_all(struct net *net) } } - ip_vs_zero_stats(&ip_vs_stats); + ip_vs_zero_stats(net_ipvs(net)->tot_stats); return 0; } @@ -1989,13 +2010,11 @@ static const struct file_operations ip_vs_info_fops = { #endif -struct ip_vs_stats ip_vs_stats = { - .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), -}; - #ifdef CONFIG_PROC_FS static int ip_vs_stats_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, @@ -2003,22 +2022,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) seq_printf(seq, " Conns Packets Packets Bytes Bytes\n"); - spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, - ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, - (unsigned long long) ip_vs_stats.ustats.inbytes, - (unsigned long long) ip_vs_stats.ustats.outbytes); + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns, + tot_stats->ustats.inpkts, tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.ustats.cps, - ip_vs_stats.ustats.inpps, - ip_vs_stats.ustats.outpps, - ip_vs_stats.ustats.inbps, - ip_vs_stats.ustats.outbps); - spin_unlock_bh(&ip_vs_stats.lock); + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); return 0; } @@ -2036,6 +2055,59 @@ static const struct file_operations ip_vs_stats_fops = { .release = single_release, }; +static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_single_net(seq); + struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats; + int i; + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Total Incoming Outgoing Incoming Outgoing\n"); + seq_printf(seq, + "CPU Conns Packets Packets Bytes Bytes\n"); + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i); + seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n", + i, u->ustats.conns, u->ustats.inpkts, + u->ustats.outpkts, (__u64)u->ustats.inbytes, + (__u64)u->ustats.outbytes); + } + + spin_lock_bh(&tot_stats->lock); + seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n", + tot_stats->ustats.conns, tot_stats->ustats.inpkts, + tot_stats->ustats.outpkts, + (unsigned long long) tot_stats->ustats.inbytes, + (unsigned long long) tot_stats->ustats.outbytes); + +/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ + seq_puts(seq, + " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); + seq_printf(seq, " %8X %8X %8X %16X %16X\n", + tot_stats->ustats.cps, + tot_stats->ustats.inpps, + tot_stats->ustats.outpps, + tot_stats->ustats.inbps, + tot_stats->ustats.outbps); + spin_unlock_bh(&tot_stats->lock); + + return 0; +} + +static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, ip_vs_stats_percpu_show); +} + +static const struct file_operations ip_vs_stats_percpu_fops = { + .owner = THIS_MODULE, + .open = ip_vs_stats_percpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* @@ -3461,32 +3533,54 @@ int __net_init __ip_vs_control_init(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + /* procfs stats */ + ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); + if (ipvs->tot_stats == NULL) { + pr_err("%s(): no memory.\n", __func__); + return -ENOMEM; + } + ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats); + if (!ipvs->cpustats) { + pr_err("%s() alloc_percpu failed\n", __func__); + goto err_alloc; + } + spin_lock_init(&ipvs->tot_stats->lock); for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_LIST_HEAD(&ipvs->rs_table[idx]); proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); + proc_net_fops_create(net, "ip_vs_stats_percpu", 0, + &ip_vs_stats_percpu_fops); sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, vs_vars); if (sysctl_header == NULL) goto err_reg; - ip_vs_new_estimator(net, &ip_vs_stats); + ip_vs_new_estimator(net, ipvs->tot_stats); return 0; err_reg: + free_percpu(ipvs->cpustats); +err_alloc: + kfree(ipvs->tot_stats); return -ENOMEM; } static void __net_exit __ip_vs_control_cleanup(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; - ip_vs_kill_estimator(net, &ip_vs_stats); + ip_vs_kill_estimator(net, ipvs->tot_stats); unregister_net_sysctl_table(sysctl_header); + proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); + free_percpu(ipvs->cpustats); + kfree(ipvs->tot_stats); } static struct pernet_operations ipvs_control_ops = { diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 07d839bef537..d13616b138cd 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -52,6 +52,43 @@ */ +/* + * Make a summary from each cpu + */ +static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum, + struct ip_vs_cpu_stats *stats) +{ + int i; + + for_each_possible_cpu(i) { + struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i); + unsigned int start; + __u64 inbytes, outbytes; + if (i) { + sum->conns += s->ustats.conns; + sum->inpkts += s->ustats.inpkts; + sum->outpkts += s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + inbytes = s->ustats.inbytes; + outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + sum->inbytes += inbytes; + sum->outbytes += outbytes; + } else { + sum->conns = s->ustats.conns; + sum->inpkts = s->ustats.inpkts; + sum->outpkts = s->ustats.outpkts; + do { + start = u64_stats_fetch_begin_bh(&s->syncp); + sum->inbytes = s->ustats.inbytes; + sum->outbytes = s->ustats.outbytes; + } while (u64_stats_fetch_retry_bh(&s->syncp, start)); + } + } +} + + static void estimation_timer(unsigned long arg) { struct ip_vs_estimator *e; @@ -64,10 +101,12 @@ static void estimation_timer(unsigned long arg) struct netns_ipvs *ipvs; ipvs = net_ipvs(net); + ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { s = container_of(e, struct ip_vs_stats, est); + ip_vs_read_cpu_stats(&s->ustats, s->cpustats); spin_lock(&s->lock); n_conns = s->ustats.conns; n_inpkts = s->ustats.inpkts; -- cgit v1.2.3-59-g8ed1b From 6e67e586e7289c144d5a189d6e0fa7141d025746 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:57 +0100 Subject: IPVS: netns, connection hash got net as param. Connection hash table is now name space aware. i.e. net ptr >> 8 is xor:ed to the hash, and this is the first param to be compared. The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s) and cache-line aligned, so a ptr >> 5 might be a more clever solution ? All lookups where net is compared uses net_eq() which returns 1 when netns is disabled, and the compiler seems to do something clever in that case. ip_vs_conn_fill_param() have *net as first param now. Three new inlines added to keep conn struct smaller when names space is disabled. - ip_vs_conn_net() - ip_vs_conn_net_set() - ip_vs_conn_net_eq() *v3 moved net compare to the end in "fast path" Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 53 +++++++++++---- include/net/netns/ip_vs.h | 2 + net/netfilter/ipvs/ip_vs_conn.c | 112 ++++++++++++++++++++------------ net/netfilter/ipvs/ip_vs_core.c | 15 +++-- net/netfilter/ipvs/ip_vs_ftp.c | 14 ++-- net/netfilter/ipvs/ip_vs_nfct.c | 6 +- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 15 +++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 13 ++-- 11 files changed, 153 insertions(+), 83 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 605d5db81a39..f82c0ffdee74 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -477,6 +477,7 @@ extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net, unsigned short proto); struct ip_vs_conn_param { + struct net *net; const union nf_inet_addr *caddr; const union nf_inet_addr *vaddr; __be16 cport; @@ -494,17 +495,19 @@ struct ip_vs_conn_param { */ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ - +#ifdef CONFIG_NET_NS + struct net *net; /* Name space */ +#endif /* Protocol, addresses and port numbers */ - u16 af; /* address family */ - union nf_inet_addr caddr; /* client address */ - union nf_inet_addr vaddr; /* virtual address */ - union nf_inet_addr daddr; /* destination address */ - volatile __u32 flags; /* status flags */ - __u32 fwmark; /* Fire wall mark from skb */ - __be16 cport; - __be16 vport; - __be16 dport; + u16 af; /* address family */ + __be16 cport; + __be16 vport; + __be16 dport; + __u32 fwmark; /* Fire wall mark from skb */ + union nf_inet_addr caddr; /* client address */ + union nf_inet_addr vaddr; /* virtual address */ + union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __u16 protocol; /* Which protocol (TCP/UDP) */ /* counter and timer */ @@ -547,6 +550,33 @@ struct ip_vs_conn { __u8 pe_data_len; }; +/* + * To save some memory in conn table when name space is disabled. + */ +static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp) +{ +#ifdef CONFIG_NET_NS + return cp->net; +#else + return &init_net; +#endif +} +static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net) +{ +#ifdef CONFIG_NET_NS + cp->net = net; +#endif +} + +static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp, + struct net *net) +{ +#ifdef CONFIG_NET_NS + return cp->net == net; +#else + return 1; +#endif +} /* * Extended internal versions of struct ip_vs_service_user and @@ -796,13 +826,14 @@ enum { IP_VS_DIR_LAST, }; -static inline void ip_vs_conn_fill_param(int af, int protocol, +static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol, const union nf_inet_addr *caddr, __be16 cport, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { + p->net = net; p->af = af; p->protocol = protocol; p->caddr = caddr; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index bd1dad872178..1acfb334e69b 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -66,6 +66,8 @@ struct netns_ipvs { struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ seqcount_t *ustats_seq; /* u64 read retry */ + /* ip_vs_conn */ + atomic_t conn_count; /* connection counter */ /* ip_vs_lblc */ int sysctl_lblc_expiration; struct ctl_table_header *lblc_ctl_header; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b2024c942345..0d5e4feabc1b 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for current IPVS connections */ -static atomic_t ip_vs_conn_count = ATOMIC_INIT(0); - /* counter for no client port connections */ static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); @@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly; /* * Fine locking granularity for big connection hash table */ -#define CT_LOCKARRAY_BITS 4 +#define CT_LOCKARRAY_BITS 5 #define CT_LOCKARRAY_SIZE (1<>8)) & ip_vs_conn_tab_mask; #endif - return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) - & ip_vs_conn_tab_mask; + return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) ^ + ((size_t)net>>8)) & ip_vs_conn_tab_mask; } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, @@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, port = p->vport; } - return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port); } static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, - NULL, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol, + &cp->caddr, cp->cport, NULL, 0, &p); if (cp->pe) { p.pe = cp->pe; @@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) } /* - * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. + * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) @@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->cport == cp->cport && p->vport == cp->vport && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - p->cport == cp->cport && p->vport == cp->vport && ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, struct ip_vs_conn_param *p) { __be16 _ports[2], *pptr; + struct net *net = skb_net(skb); pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return 1; if (likely(!inverse)) - ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], - &iph->daddr, pptr[1], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr, + pptr[0], &iph->daddr, pptr[1], p); else - ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], - &iph->saddr, pptr[0], p); + ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr, + pptr[1], &iph->saddr, pptr[0], p); return 0; } @@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (!ip_vs_conn_net_eq(cp, p->net)) + continue; if (p->pe_data && p->pe->ct_match) { if (p->pe == cp->pe && p->pe->ct_match(p, cp)) goto out; @@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { if (cp->af == p->af && + p->vport == cp->cport && p->cport == cp->dport && ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && - p->vport == cp->cport && p->cport == cp->dport && - p->protocol == cp->protocol) { + p->protocol == cp->protocol && + ip_vs_conn_net_eq(cp, p->net)) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; @@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport, - &cp->vaddr, cp->vport, + dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr, + cp->dport, &cp->vaddr, cp->vport, cp->protocol, cp->fwmark); ip_vs_bind_dest(cp, dest); return dest; @@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct) static void ip_vs_conn_expire(unsigned long data) { struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); cp->timeout = 60*HZ; @@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data) ip_vs_unbind_dest(cp); if (cp->flags & IP_VS_CONN_F_NO_CPORT) atomic_dec(&ip_vs_conn_no_cport_cnt); - atomic_dec(&ip_vs_conn_count); + atomic_dec(&ipvs->conn_count); kmem_cache_free(ip_vs_conn_cachep, cp); return; @@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, struct ip_vs_dest *dest, __u32 fwmark) { struct ip_vs_conn *cp; - struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol); + struct netns_ipvs *ipvs = net_ipvs(p->net); + struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net, + p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + ip_vs_conn_net_set(cp, p->net); cp->af = p->af; cp->protocol = p->protocol; ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); @@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, atomic_set(&cp->n_control, 0); atomic_set(&cp->in_pkts, 0); - atomic_inc(&ip_vs_conn_count); + atomic_inc(&ipvs->conn_count); if (flags & IP_VS_CONN_F_NO_CPORT) atomic_inc(&ip_vs_conn_no_cport_cnt); @@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * /proc/net/ip_vs_conn entries */ #ifdef CONFIG_PROC_FS +struct ip_vs_iter_state { + struct seq_net_private p; + struct list_head *l; +}; static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) { int idx; struct ip_vs_conn *cp; + struct ip_vs_iter_state *iter = seq->private; for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } } @@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) { - seq->private = NULL; + struct ip_vs_iter_state *iter = seq->private; + + iter->l = NULL; return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ip_vs_conn *cp = v; - struct list_head *e, *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *e, *l = iter->l; int idx; ++*pos; @@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) while (++idx < ip_vs_conn_tab_size) { ct_read_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - seq->private = &ip_vs_conn_tab[idx]; + iter->l = &ip_vs_conn_tab[idx]; return cp; } ct_read_unlock_bh(idx); } - seq->private = NULL; + iter->l = NULL; return NULL; } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) { - struct list_head *l = seq->private; + struct ip_vs_iter_state *iter = seq->private; + struct list_head *l = iter->l; if (l) ct_read_unlock_bh(l - ip_vs_conn_tab); @@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; + if (!ip_vs_conn_net_eq(cp, net)) + return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = { static int ip_vs_conn_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_fops = { @@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; + struct net *net = seq_file_net(seq); + + if (!ip_vs_conn_net_eq(cp, net)) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { static int ip_vs_conn_sync_open(struct inode *inode, struct file *file) { - return seq_open(file, &ip_vs_conn_sync_seq_ops); + return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops, + sizeof(struct ip_vs_iter_state)); } static const struct file_operations ip_vs_conn_sync_fops = { @@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void) /* * Flush all the connection entries in the ip_vs_conn_tab */ -static void ip_vs_conn_flush(void) +static void ip_vs_conn_flush(struct net *net) { int idx; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { @@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void) ct_write_lock_bh(idx); list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { - + if (!ip_vs_conn_net_eq(cp, net)) + continue; IP_VS_DBG(4, "del connection\n"); ip_vs_conn_expire_now(cp); if (cp->control) { @@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void) /* the counter may be not NULL, because maybe some conn entries are run by slow timer handler or unhashed but still referred */ - if (atomic_read(&ip_vs_conn_count) != 0) { + if (atomic_read(&ipvs->conn_count) != 0) { schedule(); goto flush_again; } @@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void) */ int __net_init __ip_vs_conn_init(struct net *net) { + struct netns_ipvs *ipvs = net_ipvs(net); + if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + atomic_set(&ipvs->conn_count, 0); proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops); proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops); @@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; + /* flush all the connection entries first */ + ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } @@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void) void ip_vs_conn_cleanup(void) { unregister_pernet_subsys(&ipvs_conn_ops); - /* flush all the connection entries first */ - ip_vs_conn_flush(); - /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7e6a2a046bf5..7205b49c56c1 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, const union nf_inet_addr *vaddr, __be16 vport, struct ip_vs_conn_param *p) { - ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); + ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr, + vport, p); p->pe = svc->pe; if (p->pe && p->pe->fill_param) return p->pe->fill_param(p, skb); @@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port, - &iph.daddr, dst_port, ¶m); + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, + src_port, &iph.daddr, dst_port, ¶m); cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark); if (cp == NULL) { @@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, - pptr[0], &iph.daddr, pptr[1], &p); + + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, + &iph.saddr, pptr[0], &iph.daddr, pptr[1], + &p); cp = ip_vs_conn_new(&p, &dest->addr, dest->port ? dest->port : pptr[1], flags, dest, skb->mark); @@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(svc->af, iph.protocol, + ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr, pptr[0], &iph.daddr, pptr[1], &p); cp = ip_vs_conn_new(&p, &daddr, 0, diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 77b0036dcb73..6a04f9ab9d0d 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, */ { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, - &from, port, &cp->caddr, 0, &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &from, port, + &cp->caddr, 0, &p); n_cp = ip_vs_conn_out_get(&p); } if (!n_cp) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, + ip_vs_conn_fill_param(ip_vs_conn_net(cp), + AF_INET, IPPROTO_TCP, &cp->caddr, 0, &cp->vaddr, port, &p); n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | @@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, - &cp->vaddr, htons(ntohs(cp->vport)-1), - &p); + ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET, + iph->protocol, &to, port, &cp->vaddr, + htons(ntohs(cp->vport)-1), &p); n_cp = ip_vs_conn_in_get(&p); if (!n_cp) { n_cp = ip_vs_conn_new(&p, &cp->daddr, diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index 4680647cd450..f454c80df0a7 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, struct nf_conntrack_tuple *orig, new_reply; struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = nf_ct_net(ct); if (exp->tuple.src.l3num != PF_INET) return; @@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, + ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum, &orig->src.u3, orig->src.u.tcp.port, &orig->dst.u3, orig->dst.u.tcp.port, &p); cp = ip_vs_conn_out_get(&p); @@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) " for conn " FMT_CONN "\n", __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); - h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); + h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE, + &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); /* Show what happens instead of calling nf_ct_kill() */ diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 28039cbfcff4..5b8eb8b12c3e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -41,15 +41,16 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 static void -ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, - int inverse, struct ip_vs_conn_param *p) +ah_esp_conn_fill_param_proto(struct net *net, int af, + const struct ip_vs_iphdr *iph, int inverse, + struct ip_vs_conn_param *p) { if (likely(!inverse)) - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->saddr, htons(PORT_ISAKMP), &iph->daddr, htons(PORT_ISAKMP), p); else - ip_vs_conn_fill_param(af, IPPROTO_UDP, + ip_vs_conn_fill_param(net, af, IPPROTO_UDP, &iph->daddr, htons(PORT_ISAKMP), &iph->saddr, htons(PORT_ISAKMP), p); } @@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_in_get(&p); if (!cp) { /* @@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, { struct ip_vs_conn *cp; struct ip_vs_conn_param p; + struct net *net = skb_net(skb); - ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p); cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 569e77bf08c4..550365a690c7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc) static int sctp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 757aaaf083bb..d8b3f9f15826 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc) static int tcp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 1dc394100fa8..581157bbded5 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc) static int udp_app_conn_bind(struct ip_vs_conn *cp) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp)); int hash; struct ip_vs_app *inc; int result = 0; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index c29e73d686fb..f85e47daecc3 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -660,21 +660,21 @@ control: * fill_param used by version 1 */ static inline int -ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc, +ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc, struct ip_vs_conn_param *p, __u8 *pe_data, unsigned int pe_data_len, __u8 *pe_name, unsigned int pe_name_len) { #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) - ip_vs_conn_fill_param(af, sc->v6.protocol, + ip_vs_conn_fill_param(net, af, sc->v6.protocol, (const union nf_inet_addr *)&sc->v6.caddr, sc->v6.cport, (const union nf_inet_addr *)&sc->v6.vaddr, sc->v6.vport, p); else #endif - ip_vs_conn_fill_param(af, sc->v4.protocol, + ip_vs_conn_fill_param(net, af, sc->v4.protocol, (const union nf_inet_addr *)&sc->v4.caddr, sc->v4.cport, (const union nf_inet_addr *)&sc->v4.vaddr, @@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer, } } - ip_vs_conn_fill_param(AF_INET, s->protocol, + ip_vs_conn_fill_param(net, AF_INET, s->protocol, (const union nf_inet_addr *)&s->caddr, s->cport, (const union nf_inet_addr *)&s->vaddr, @@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) state = 0; } } - if (ip_vs_conn_fill_param_sync(af, s, ¶m, - pe_data, pe_data_len, - pe_name, pe_name_len)) { + if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data, + pe_data_len, pe_name, pe_name_len)) { retc = 50; goto out; } -- cgit v1.2.3-59-g8ed1b From a0840e2e165a370ca24a59545e564e9881a55891 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:58 +0100 Subject: IPVS: netns, ip_vs_ctl local vars moved to ipvs struct. Moving global vars to ipvs struct, except for svc table lock. Next patch for ctl will be drop-rate handling. *v3 __ip_vs_mutex remains global ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 27 ++-- include/net/netns/ip_vs.h | 37 ++++- net/netfilter/ipvs/ip_vs_conn.c | 7 +- net/netfilter/ipvs/ip_vs_core.c | 34 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 291 ++++++++++++++++++---------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +- net/netfilter/ipvs/ip_vs_sync.c | 9 +- 9 files changed, 230 insertions(+), 181 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f82c0ffdee74..af9acf44e40a 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -41,7 +41,7 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) * Get net ptr from skb in traffic cases * use skb_sknet when call is from userland (ioctl or netlink) */ -static inline struct net *skb_net(struct sk_buff *skb) +static inline struct net *skb_net(const struct sk_buff *skb) { #ifdef CONFIG_NET_NS #ifdef CONFIG_IP_VS_DEBUG @@ -69,7 +69,7 @@ static inline struct net *skb_net(struct sk_buff *skb) #endif } -static inline struct net *skb_sknet(struct sk_buff *skb) +static inline struct net *skb_sknet(const struct sk_buff *skb) { #ifdef CONFIG_NET_NS #ifdef CONFIG_IP_VS_DEBUG @@ -1023,13 +1023,6 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* * IPVS control data and functions (from ip_vs_ctl.c) */ -extern int sysctl_ip_vs_cache_bypass; -extern int sysctl_ip_vs_expire_nodest_conn; -extern int sysctl_ip_vs_expire_quiescent_template; -extern int sysctl_ip_vs_sync_threshold[2]; -extern int sysctl_ip_vs_nat_icmp_send; -extern int sysctl_ip_vs_conntrack; -extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; extern int sysctl_ip_vs_sync_ver; @@ -1119,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6 extern int ip_vs_drop_rate; extern int ip_vs_drop_counter; -static __inline__ int ip_vs_todrop(void) +static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { - if (!ip_vs_drop_rate) return 0; - if (--ip_vs_drop_counter > 0) return 0; - ip_vs_drop_counter = ip_vs_drop_rate; + if (!ipvs->drop_rate) + return 0; + if (--ipvs->drop_counter > 0) + return 0; + ipvs->drop_counter = ipvs->drop_rate; return 1; } @@ -1211,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb) * Netfilter connection tracking * (from ip_vs_nfct.c) */ -static inline int ip_vs_conntrack_enabled(void) +static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { - return sysctl_ip_vs_conntrack; + return ipvs->sysctl_conntrack; } extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, @@ -1226,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); #else -static inline int ip_vs_conntrack_enabled(void) +static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs) { return 0; } diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 1acfb334e69b..c4b1abf258e4 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -61,13 +61,46 @@ struct netns_ipvs { struct list_head sctp_apps[SCTP_APP_TAB_SIZE]; spinlock_t sctp_app_lock; #endif + /* ip_vs_conn */ + atomic_t conn_count; /* connection counter */ + /* ip_vs_ctl */ struct ip_vs_stats *tot_stats; /* Statistics & est. */ struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */ seqcount_t *ustats_seq; /* u64 read retry */ - /* ip_vs_conn */ - atomic_t conn_count; /* connection counter */ + int num_services; /* no of virtual services */ + /* 1/rate drop and drop-entry variables */ + int drop_rate; + int drop_counter; + atomic_t dropentry; + /* locks in ctl.c */ + spinlock_t dropentry_lock; /* drop entry handling */ + spinlock_t droppacket_lock; /* drop packet handling */ + spinlock_t securetcp_lock; /* state and timeout tables */ + rwlock_t rs_lock; /* real services table */ + /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ + struct lock_class_key ctl_key; /* ctl_mutex debuging */ + /* sys-ctl struct */ + struct ctl_table_header *sysctl_hdr; + struct ctl_table *sysctl_tbl; + /* sysctl variables */ + int sysctl_amemthresh; + int sysctl_am_droprate; + int sysctl_drop_entry; + int sysctl_drop_packet; + int sysctl_secure_tcp; +#ifdef CONFIG_IP_VS_NFCT + int sysctl_conntrack; +#endif + int sysctl_snat_reroute; + int sysctl_sync_ver; + int sysctl_cache_bypass; + int sysctl_expire_nodest_conn; + int sysctl_expire_quiescent_template; + int sysctl_sync_threshold[2]; + int sysctl_nat_icmp_send; + /* ip_vs_lblc */ int sysctl_lblc_expiration; struct ctl_table_header *lblc_ctl_header; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 0d5e4feabc1b..5ba205a4d79c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) int ip_vs_check_template(struct ip_vs_conn *ct) { struct ip_vs_dest *dest = ct->dest; + struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct)); /* * Checking the dest server status. */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - (sysctl_ip_vs_expire_quiescent_template && + (ipvs->sysctl_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " @@ -879,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, * IP_VS_CONN_F_ONE_PACKET too. */ - if (ip_vs_conntrack_enabled()) + if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; /* Hash it in the ip_vs_conn_tab finally */ @@ -1198,7 +1199,7 @@ static void ip_vs_conn_flush(struct net *net) struct ip_vs_conn *cp; struct netns_ipvs *ipvs = net_ipvs(net); - flush_again: +flush_again: for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { /* * Lock is actually needed in this loop. diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7205b49c56c1..a7c59a722af3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_proto_data *pd) { + struct netns_ipvs *ipvs; __be16 _ports[2], *pptr; struct ip_vs_iphdr iph; int unicast; @@ -521,7 +522,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* if it is fwmark-based service, the cache_bypass sysctl is up and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { + ipvs = net_ipvs(skb_net(skb)); + if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && @@ -733,6 +735,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, unsigned int offset, unsigned int ihl) { + struct netns_ipvs *ipvs; unsigned int verdict = NF_DROP; if (IP_VS_FWD_METHOD(cp) != 0) { @@ -754,6 +757,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, if (!skb_make_writable(skb, offset)) goto out; + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) ip_vs_nat_icmp_v6(skb, pp, cp, 1); @@ -763,11 +768,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto out; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto out; @@ -979,6 +984,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, int ihl) { struct ip_vs_protocol *pp = pd->pp; + struct netns_ipvs *ipvs; IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); @@ -1014,13 +1020,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ + ipvs = net_ipvs(skb_net(skb)); + #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0) + if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0) goto drop; } else #endif - if ((sysctl_ip_vs_snat_reroute || + if ((ipvs->sysctl_snat_reroute || skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; @@ -1057,6 +1065,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs; EnterFunction(11); @@ -1131,10 +1140,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) * Check if the packet belongs to an existing entry */ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0); + ipvs = net_ipvs(net); if (likely(cp)) return handle_response(af, skb, pd, cp, iph.len); - if (sysctl_ip_vs_nat_icmp_send && + if (ipvs->sysctl_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || pp->protocol == IPPROTO_SCTP)) { @@ -1580,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { /* the destination server is not available */ - if (sysctl_ip_vs_expire_nodest_conn) { + if (ipvs->sysctl_expire_nodest_conn) { /* try to expire the connection immediately */ ip_vs_conn_expire_now(cp); } @@ -1610,15 +1620,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) */ if (cp->flags & IP_VS_CONN_F_ONE_PACKET) - pkts = sysctl_ip_vs_sync_threshold[0]; + pkts = ipvs->sysctl_sync_threshold[0]; else pkts = atomic_add_return(1, &cp->in_pkts); if ((ipvs->sync_state & IP_VS_STATE_MASTER) && cp->protocol == IPPROTO_SCTP) { if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || (cp->old_state != cp->state && ((cp->state == IP_VS_SCTP_S_CLOSED) || (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) || @@ -1632,8 +1642,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) else if ((ipvs->sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && - (pkts % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) || + (pkts % ipvs->sysctl_sync_threshold[1] + == ipvs->sysctl_sync_threshold[0])) || ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && ((cp->state == IP_VS_TCP_S_FIN_WAIT) || (cp->state == IP_VS_TCP_S_CLOSE) || diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index cbd58c60e1bf..183ac18bded5 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -58,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex); /* lock for service table */ static DEFINE_RWLOCK(__ip_vs_svc_lock); -/* lock for table with the real services */ -static DEFINE_RWLOCK(__ip_vs_rs_lock); - -/* lock for state and timeout tables */ -static DEFINE_SPINLOCK(ip_vs_securetcp_lock); - -/* lock for drop entry handling */ -static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); - -/* lock for drop packet handling */ -static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); - -/* 1/rate drop and drop-entry variables */ -int ip_vs_drop_rate = 0; -int ip_vs_drop_counter = 0; -static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); - -/* number of virtual services */ -static int ip_vs_num_services = 0; - /* sysctl variables */ -static int sysctl_ip_vs_drop_entry = 0; -static int sysctl_ip_vs_drop_packet = 0; -static int sysctl_ip_vs_secure_tcp = 0; -static int sysctl_ip_vs_amemthresh = 1024; -static int sysctl_ip_vs_am_droprate = 10; -int sysctl_ip_vs_cache_bypass = 0; -int sysctl_ip_vs_expire_nodest_conn = 0; -int sysctl_ip_vs_expire_quiescent_template = 0; -int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; -int sysctl_ip_vs_nat_icmp_send = 0; -#ifdef CONFIG_IP_VS_NFCT -int sysctl_ip_vs_conntrack; -#endif -int sysctl_ip_vs_snat_reroute = 1; -int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */ #ifdef CONFIG_IP_VS_DEBUG static int sysctl_ip_vs_debug_level = 0; @@ -142,73 +107,73 @@ static void update_defense_level(struct netns_ipvs *ipvs) /* si_swapinfo(&i); */ /* availmem = availmem - (i.totalswap - i.freeswap); */ - nomem = (availmem < sysctl_ip_vs_amemthresh); + nomem = (availmem < ipvs->sysctl_amemthresh); local_bh_disable(); /* drop_entry */ - spin_lock(&__ip_vs_dropentry_lock); - switch (sysctl_ip_vs_drop_entry) { + spin_lock(&ipvs->dropentry_lock); + switch (ipvs->sysctl_drop_entry) { case 0: - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); break; case 1: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); - sysctl_ip_vs_drop_entry = 2; + atomic_set(&ipvs->dropentry, 1); + ipvs->sysctl_drop_entry = 2; } else { - atomic_set(&ip_vs_dropentry, 0); + atomic_set(&ipvs->dropentry, 0); } break; case 2: if (nomem) { - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); } else { - atomic_set(&ip_vs_dropentry, 0); - sysctl_ip_vs_drop_entry = 1; + atomic_set(&ipvs->dropentry, 0); + ipvs->sysctl_drop_entry = 1; }; break; case 3: - atomic_set(&ip_vs_dropentry, 1); + atomic_set(&ipvs->dropentry, 1); break; } - spin_unlock(&__ip_vs_dropentry_lock); + spin_unlock(&ipvs->dropentry_lock); /* drop_packet */ - spin_lock(&__ip_vs_droppacket_lock); - switch (sysctl_ip_vs_drop_packet) { + spin_lock(&ipvs->droppacket_lock); + switch (ipvs->sysctl_drop_packet) { case 0: - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; break; case 1: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); - sysctl_ip_vs_drop_packet = 2; + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); + ipvs->sysctl_drop_packet = 2; } else { - ip_vs_drop_rate = 0; + ipvs->drop_rate = 0; } break; case 2: if (nomem) { - ip_vs_drop_rate = ip_vs_drop_counter - = sysctl_ip_vs_amemthresh / - (sysctl_ip_vs_amemthresh-availmem); + ipvs->drop_rate = ipvs->drop_counter + = ipvs->sysctl_amemthresh / + (ipvs->sysctl_amemthresh-availmem); } else { - ip_vs_drop_rate = 0; - sysctl_ip_vs_drop_packet = 1; + ipvs->drop_rate = 0; + ipvs->sysctl_drop_packet = 1; } break; case 3: - ip_vs_drop_rate = sysctl_ip_vs_am_droprate; + ipvs->drop_rate = ipvs->sysctl_am_droprate; break; } - spin_unlock(&__ip_vs_droppacket_lock); + spin_unlock(&ipvs->droppacket_lock); /* secure_tcp */ - spin_lock(&ip_vs_securetcp_lock); - switch (sysctl_ip_vs_secure_tcp) { + spin_lock(&ipvs->securetcp_lock); + switch (ipvs->sysctl_secure_tcp) { case 0: if (old_secure_tcp >= 2) to_change = 0; @@ -217,7 +182,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) if (nomem) { if (old_secure_tcp < 2) to_change = 1; - sysctl_ip_vs_secure_tcp = 2; + ipvs->sysctl_secure_tcp = 2; } else { if (old_secure_tcp >= 2) to_change = 0; @@ -230,7 +195,7 @@ static void update_defense_level(struct netns_ipvs *ipvs) } else { if (old_secure_tcp >= 2) to_change = 0; - sysctl_ip_vs_secure_tcp = 1; + ipvs->sysctl_secure_tcp = 1; } break; case 3: @@ -238,11 +203,11 @@ static void update_defense_level(struct netns_ipvs *ipvs) to_change = 1; break; } - old_secure_tcp = sysctl_ip_vs_secure_tcp; + old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, - sysctl_ip_vs_secure_tcp > 1); - spin_unlock(&ip_vs_securetcp_lock); + ipvs->sysctl_secure_tcp > 1); + spin_unlock(&ipvs->securetcp_lock); local_bh_enable(); } @@ -260,7 +225,7 @@ static void defense_work_handler(struct work_struct *work) struct netns_ipvs *ipvs = net_ipvs(&init_net); update_defense_level(ipvs); - if (atomic_read(&ip_vs_dropentry)) + if (atomic_read(&ipvs->dropentry)) ip_vs_random_dropentry(); schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); @@ -602,7 +567,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, */ hash = ip_vs_rs_hashkey(af, daddr, dport); - read_lock(&__ip_vs_rs_lock); + read_lock(&ipvs->rs_lock); list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) { if ((dest->af == af) && ip_vs_addr_equal(af, &dest->addr, daddr) @@ -610,11 +575,11 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol, && ((dest->protocol == protocol) || dest->vfwmark)) { /* HIT */ - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return dest; } } - read_unlock(&__ip_vs_rs_lock); + read_unlock(&ipvs->rs_lock); return NULL; } @@ -788,9 +753,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, * Put the real service in rs_table if not present. * For now only for NAT! */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_hash(ipvs, dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); } atomic_set(&dest->conn_flags, conn_flags); @@ -1022,14 +987,16 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) */ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) { + struct netns_ipvs *ipvs = net_ipvs(net); + ip_vs_kill_estimator(net, &dest->stats); /* * Remove it from the d-linked list with the real services. */ - write_lock_bh(&__ip_vs_rs_lock); + write_lock_bh(&ipvs->rs_lock); ip_vs_rs_unhash(dest); - write_unlock_bh(&__ip_vs_rs_lock); + write_unlock_bh(&ipvs->rs_lock); /* * Decrease the refcnt of the dest, and free the dest @@ -1092,7 +1059,6 @@ static int ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - struct net *net = svc->net; __be16 dport = udest->port; EnterFunction(2); @@ -1121,7 +1087,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) /* * Delete the destination */ - __ip_vs_del_dest(net, dest); + __ip_vs_del_dest(svc->net, dest); LeaveFunction(2); @@ -1140,6 +1106,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, struct ip_vs_scheduler *sched = NULL; struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; + struct netns_ipvs *ipvs = net_ipvs(net); /* increase the module use count */ ip_vs_use_count_inc(); @@ -1219,7 +1186,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services++; + ipvs->num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1359,12 +1326,13 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; + struct netns_ipvs *ipvs = net_ipvs(svc->net); pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) - ip_vs_num_services--; + ipvs->num_services--; ip_vs_kill_estimator(svc->net, &svc->stats); @@ -1589,42 +1557,31 @@ proc_do_sync_mode(ctl_table *table, int write, /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) + * Do not change order or insert new entries without + * align with netns init in __ip_vs_control_init() */ static struct ctl_table vs_vars[] = { { .procname = "amemthresh", - .data = &sysctl_ip_vs_amemthresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_IP_VS_DEBUG - { - .procname = "debug_level", - .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, -#endif { .procname = "am_droprate", - .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "drop_entry", - .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "drop_packet", - .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, @@ -1632,7 +1589,6 @@ static struct ctl_table vs_vars[] = { #ifdef CONFIG_IP_VS_NFCT { .procname = "conntrack", - .data = &sysctl_ip_vs_conntrack, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -1640,25 +1596,62 @@ static struct ctl_table vs_vars[] = { #endif { .procname = "secure_tcp", - .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_do_defense_mode, }, { .procname = "snat_reroute", - .data = &sysctl_ip_vs_snat_reroute, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, }, { .procname = "sync_version", - .data = &sysctl_ip_vs_sync_ver, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_do_sync_mode, }, + { + .procname = "cache_bypass", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_nodest_conn", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "expire_quiescent_template", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "sync_threshold", + .maxlen = + sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), + .mode = 0644, + .proc_handler = proc_do_sync_threshold, + }, + { + .procname = "nat_icmp_send", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_IP_VS_DEBUG + { + .procname = "debug_level", + .data = &sysctl_ip_vs_debug_level, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #if 0 { .procname = "timeout_established", @@ -1745,41 +1738,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = proc_dointvec_jiffies, }, #endif - { - .procname = "cache_bypass", - .data = &sysctl_ip_vs_cache_bypass, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_nodest_conn", - .data = &sysctl_ip_vs_expire_nodest_conn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "expire_quiescent_template", - .data = &sysctl_ip_vs_expire_quiescent_template, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "sync_threshold", - .data = &sysctl_ip_vs_sync_threshold, - .maxlen = sizeof(sysctl_ip_vs_sync_threshold), - .mode = 0644, - .proc_handler = proc_do_sync_threshold, - }, - { - .procname = "nat_icmp_send", - .data = &sysctl_ip_vs_nat_icmp_send, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; @@ -1791,8 +1749,6 @@ const struct ctl_path net_vs_ctl_path[] = { }; EXPORT_SYMBOL_GPL(net_vs_ctl_path); -static struct ctl_table_header * sysctl_header; - #ifdef CONFIG_PROC_FS struct ip_vs_iter { @@ -2543,7 +2499,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) struct ip_vs_getinfo info; info.version = IP_VS_VERSION_CODE; info.size = ip_vs_conn_tab_size; - info.num_services = ip_vs_num_services; + info.num_services = ipvs->num_services; if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } @@ -3014,7 +2970,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct ip_vs_service *svc; struct ip_vs_dest *dest; struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; - struct net *net; + struct net *net = skb_sknet(skb); mutex_lock(&__ip_vs_mutex); @@ -3023,7 +2979,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) goto out_err; - net = skb_sknet(skb); + svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]); if (IS_ERR(svc) || svc == NULL) goto out_err; @@ -3215,8 +3171,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) int ret = 0, cmd; int need_full_svc = 0, need_full_dest = 0; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; mutex_lock(&__ip_vs_mutex); @@ -3326,8 +3284,10 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) void *reply; int ret, cmd, reply_cmd; struct net *net; + struct netns_ipvs *ipvs; net = skb_sknet(skb); + ipvs = net_ipvs(net); cmd = info->genlhdr->cmd; if (cmd == IPVS_CMD_GET_SERVICE) @@ -3530,9 +3490,21 @@ int __net_init __ip_vs_control_init(struct net *net) { int idx; struct netns_ipvs *ipvs = net_ipvs(net); + struct ctl_table *tbl; if (!net_eq(net, &init_net)) /* netns not enabled yet */ return -EPERM; + + atomic_set(&ipvs->dropentry, 0); + spin_lock_init(&ipvs->dropentry_lock); + spin_lock_init(&ipvs->droppacket_lock); + spin_lock_init(&ipvs->securetcp_lock); + ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock); + + /* Initialize rs_table */ + for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) + INIT_LIST_HEAD(&ipvs->rs_table[idx]); + /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); if (ipvs->tot_stats == NULL) { @@ -3553,14 +3525,51 @@ int __net_init __ip_vs_control_init(struct net *net) proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops); proc_net_fops_create(net, "ip_vs_stats_percpu", 0, &ip_vs_stats_percpu_fops); - sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path, + + if (!net_eq(net, &init_net)) { + tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); + if (tbl == NULL) + goto err_dup; + } else + tbl = vs_vars; + /* Initialize sysctl defaults */ + idx = 0; + ipvs->sysctl_amemthresh = 1024; + tbl[idx++].data = &ipvs->sysctl_amemthresh; + ipvs->sysctl_am_droprate = 10; + tbl[idx++].data = &ipvs->sysctl_am_droprate; + tbl[idx++].data = &ipvs->sysctl_drop_entry; + tbl[idx++].data = &ipvs->sysctl_drop_packet; +#ifdef CONFIG_IP_VS_NFCT + tbl[idx++].data = &ipvs->sysctl_conntrack; +#endif + tbl[idx++].data = &ipvs->sysctl_secure_tcp; + ipvs->sysctl_snat_reroute = 1; + tbl[idx++].data = &ipvs->sysctl_snat_reroute; + ipvs->sysctl_sync_ver = 1; + tbl[idx++].data = &ipvs->sysctl_sync_ver; + tbl[idx++].data = &ipvs->sysctl_cache_bypass; + tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; + tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; + ipvs->sysctl_sync_threshold[0] = 3; + ipvs->sysctl_sync_threshold[1] = 50; + tbl[idx].data = &ipvs->sysctl_sync_threshold; + tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); + tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; + + + ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path, vs_vars); - if (sysctl_header == NULL) + if (ipvs->sysctl_hdr == NULL) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); + ipvs->sysctl_tbl = tbl; return 0; err_reg: + if (!net_eq(net, &init_net)) + kfree(tbl); +err_dup: free_percpu(ipvs->cpustats); err_alloc: kfree(ipvs->tot_stats); @@ -3575,7 +3584,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) return; ip_vs_kill_estimator(net, ipvs->tot_stats); - unregister_net_sysctl_table(sysctl_header); + unregister_net_sysctl_table(ipvs->sysctl_hdr); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 550365a690c7..fb2d04ac5d4e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -34,7 +34,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, sh->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d8b3f9f15826..c0cc341b840d 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -54,7 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, &iph.daddr, th->dest))) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 581157bbded5..f1282cbe6fe3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -50,7 +50,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, if (svc) { int ignored; - if (ip_vs_todrop()) { + if (ip_vs_todrop(net_ipvs(net))) { /* * It seems that we are very loaded. * We have to drop this packet :( diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f85e47daecc3..b1780562c42b 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode) if (!ipvs->sync_state & IP_VS_STATE_MASTER) return; - if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff) + if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff) return; spin_lock_bh(&ipvs->sync_buff_lock); @@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp) unsigned int len, pe_name_len, pad; /* Handle old version of the protocol */ - if (sysctl_ip_vs_sync_ver == 0) { + if (ipvs->sysctl_sync_ver == 0) { ip_vs_sync_conn_v0(net, cp); return; } @@ -650,7 +650,7 @@ control: if (cp->flags & IP_VS_CONN_F_TEMPLATE) { int pkts = atomic_add_return(1, &cp->in_pkts); - if (pkts % sysctl_ip_vs_sync_threshold[1] != 1) + if (pkts % ipvs->sysctl_sync_threshold[1] != 1) return; } goto sloop; @@ -724,6 +724,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, { struct ip_vs_dest *dest; struct ip_vs_conn *cp; + struct netns_ipvs *ipvs = net_ipvs(net); if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(param); @@ -794,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, if (opt) memcpy(&cp->in_seq, opt, sizeof(*opt)); - atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]); cp->state = state; cp->old_state = cp->state; /* -- cgit v1.2.3-59-g8ed1b From f6340ee0c6b9498ec918a7bb2f44e20abb8b2833 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:44:59 +0100 Subject: IPVS: netns, defense work timer. This patch makes defense work timer per name-space, A net ptr had to be added to the ipvs struct, since it's needed by defense_work_handler. [ horms@verge.net.au: Use cancel_delayed_work_sync() instead of cancel_rearming_delayed_work(). Found during merge conflict resoliution ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- include/net/netns/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++++++----------- 5 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af9acf44e40a..fbe660f95873 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -877,7 +877,7 @@ extern const char * ip_vs_state_name(__u16 proto, int state); extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp); extern int ip_vs_check_template(struct ip_vs_conn *ct); -extern void ip_vs_random_dropentry(void); +extern void ip_vs_random_dropentry(struct net *net); extern int ip_vs_conn_init(void); extern void ip_vs_conn_cleanup(void); diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index c4b1abf258e4..41332619142c 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -71,6 +71,7 @@ struct netns_ipvs { int num_services; /* no of virtual services */ /* 1/rate drop and drop-entry variables */ + struct delayed_work defense_work; /* Work handler */ int drop_rate; int drop_counter; atomic_t dropentry; @@ -129,6 +130,8 @@ struct netns_ipvs { /* multicast interface name */ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; + /* net name space ptr */ + struct net *net; /* Needed by timer routines */ }; #endif /* IP_VS_H_ */ diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 5ba205a4d79c..28bdaf7c02f4 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1138,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp) } /* Called from keventd and must protect itself from softirqs */ -void ip_vs_random_dropentry(void) +void ip_vs_random_dropentry(struct net *net) { int idx; struct ip_vs_conn *cp; @@ -1158,7 +1158,8 @@ void ip_vs_random_dropentry(void) if (cp->flags & IP_VS_CONN_F_TEMPLATE) /* connection template */ continue; - + if (!ip_vs_conn_net_eq(cp, net)) + continue; if (cp->protocol == IPPROTO_TCP) { switch(cp->state) { case IP_VS_TCP_S_SYN_RECV: diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a7c59a722af3..bdda346a4f30 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1884,6 +1884,7 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 183ac18bded5..6a963d44df48 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -217,18 +217,16 @@ static void update_defense_level(struct netns_ipvs *ipvs) * Timer for checking the defense */ #define DEFENSE_TIMER_PERIOD 1*HZ -static void defense_work_handler(struct work_struct *work); -static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); static void defense_work_handler(struct work_struct *work) { - struct netns_ipvs *ipvs = net_ipvs(&init_net); + struct netns_ipvs *ipvs = + container_of(work, struct netns_ipvs, defense_work.work); update_defense_level(ipvs); if (atomic_read(&ipvs->dropentry)) - ip_vs_random_dropentry(); - - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); + ip_vs_random_dropentry(ipvs->net); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); } int @@ -3564,6 +3562,9 @@ int __net_init __ip_vs_control_init(struct net *net) goto err_reg; ip_vs_new_estimator(net, ipvs->tot_stats); ipvs->sysctl_tbl = tbl; + /* Schedule defense work */ + INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); + schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); return 0; err_reg: @@ -3588,6 +3589,8 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); free_percpu(ipvs->cpustats); kfree(ipvs->tot_stats); } @@ -3631,9 +3634,6 @@ int __init ip_vs_control_init(void) goto err_net; } - /* Hook the defense timer */ - schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); - LeaveFunction(2); return 0; @@ -3648,8 +3648,6 @@ void ip_vs_control_cleanup(void) { EnterFunction(2); ip_vs_trash_cleanup(); - cancel_delayed_work_sync(&defense_work); - cancel_work_sync(&defense_work.work); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); -- cgit v1.2.3-59-g8ed1b From f2431e6e9255461eb1476340a89ad32ad4b38b03 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:00 +0100 Subject: IPVS: netns, trash handling trash list per namspace, and reordering of some params in dst struct. [ horms@verge.net.au: Use cancel_delayed_work_sync() instead of cancel_rearming_delayed_work(). Found during merge conflict resoliution ] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 ++-- include/net/netns/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_ctl.c | 23 +++++++++++------------ 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fbe660f95873..b23bea62f708 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -662,8 +662,8 @@ struct ip_vs_dest { struct list_head d_list; /* for table with all the dests */ u16 af; /* address family */ - union nf_inet_addr addr; /* IP address of the server */ __be16 port; /* port number of the server */ + union nf_inet_addr addr; /* IP address of the server */ volatile unsigned flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ @@ -690,8 +690,8 @@ struct ip_vs_dest { /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ - union nf_inet_addr vaddr; /* virtual IP address */ __be16 vport; /* virtual port number */ + union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ }; diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 41332619142c..67ca1cf55af8 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -82,6 +82,9 @@ struct netns_ipvs { rwlock_t rs_lock; /* real services table */ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ struct lock_class_key ctl_key; /* ctl_mutex debuging */ + /* Trash for destinations */ + struct list_head dest_trash; + /* sys-ctl struct */ struct ctl_table_header *sysctl_hdr; struct ctl_table *sysctl_tbl; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6a963d44df48..442edf4be644 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -254,11 +254,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * Trash for destinations - */ -static LIST_HEAD(ip_vs_dest_trash); - /* * FTP & NULL virtual service counters */ @@ -650,11 +645,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(svc->net); /* * Find the destination in trash */ - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " "dest->refcnt=%d\n", dest->vfwmark, @@ -703,11 +699,12 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, * are expired, and the refcnt of each destination in the trash must * be 1, so we simply release them here. */ -static void ip_vs_trash_cleanup(void) +static void ip_vs_trash_cleanup(struct net *net) { struct ip_vs_dest *dest, *nxt; + struct netns_ipvs *ipvs = net_ipvs(net); - list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { + list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) { list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -1021,7 +1018,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest) IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->refcnt)); - list_add(&dest->n_list, &ip_vs_dest_trash); + list_add(&dest->n_list, &ipvs->dest_trash); atomic_inc(&dest->refcnt); } } @@ -3503,6 +3500,8 @@ int __net_init __ip_vs_control_init(struct net *net) for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_LIST_HEAD(&ipvs->rs_table[idx]); + INIT_LIST_HEAD(&ipvs->dest_trash); + /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); if (ipvs->tot_stats == NULL) { @@ -3584,13 +3583,14 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) if (!net_eq(net, &init_net)) /* netns not enabled yet */ return; + ip_vs_trash_cleanup(net); ip_vs_kill_estimator(net, ipvs->tot_stats); + cancel_delayed_work_sync(&ipvs->defense_work); + cancel_work_sync(&ipvs->defense_work.work); unregister_net_sysctl_table(ipvs->sysctl_hdr); proc_net_remove(net, "ip_vs_stats_percpu"); proc_net_remove(net, "ip_vs_stats"); proc_net_remove(net, "ip_vs"); - cancel_delayed_work_sync(&ipvs->defense_work); - cancel_work_sync(&ipvs->defense_work.work); free_percpu(ipvs->cpustats); kfree(ipvs->tot_stats); } @@ -3647,7 +3647,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - ip_vs_trash_cleanup(); unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); -- cgit v1.2.3-59-g8ed1b From 763f8d0ed4f1ce38b35cc0e05482b7799b82789b Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Mon, 3 Jan 2011 14:45:01 +0100 Subject: IPVS: netns, svc counters moved in ip_vs_ctl,c Last two global vars to be moved, ip_vs_ftpsvc_counter and ip_vs_nullsvc_counter. [horms@verge.net.au: removed whitespace-change-only hunk] Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/netns/ip_vs.h | 3 +++ net/netfilter/ipvs/ip_vs_ctl.c | 21 +++++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h index 67ca1cf55af8..259ebac904bf 100644 --- a/include/net/netns/ip_vs.h +++ b/include/net/netns/ip_vs.h @@ -84,6 +84,9 @@ struct netns_ipvs { struct lock_class_key ctl_key; /* ctl_mutex debuging */ /* Trash for destinations */ struct list_head dest_trash; + /* Service counters */ + atomic_t ftpsvc_counter; + atomic_t nullsvc_counter; /* sys-ctl struct */ struct ctl_table_header *sysctl_hdr; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 442edf4be644..65f5de405ad2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -254,12 +254,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; /* the service table hashed by fwmark */ static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; -/* - * FTP & NULL virtual service counters - */ -static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); -static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); - /* * Returns hash value for virtual service @@ -409,6 +403,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; + struct netns_ipvs *ipvs = net_ipvs(net); read_lock(&__ip_vs_svc_lock); @@ -427,7 +422,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, if (svc == NULL && protocol == IPPROTO_TCP - && atomic_read(&ip_vs_ftpsvc_counter) + && atomic_read(&ipvs->ftpsvc_counter) && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { /* * Check if ftp service entry exists, the packet @@ -437,7 +432,7 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol, } if (svc == NULL - && atomic_read(&ip_vs_nullsvc_counter)) { + && atomic_read(&ipvs->nullsvc_counter)) { /* * Check if the catch-all port (port zero) exists */ @@ -1173,9 +1168,9 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, /* Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_inc(&ip_vs_ftpsvc_counter); + atomic_inc(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_inc(&ip_vs_nullsvc_counter); + atomic_inc(&ipvs->nullsvc_counter); ip_vs_new_estimator(net, &svc->stats); @@ -1359,9 +1354,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) * Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_dec(&ip_vs_ftpsvc_counter); + atomic_dec(&ipvs->ftpsvc_counter); else if (svc->port == 0) - atomic_dec(&ip_vs_nullsvc_counter); + atomic_dec(&ipvs->nullsvc_counter); /* * Free the service if nobody refers to it @@ -3501,6 +3496,8 @@ int __net_init __ip_vs_control_init(struct net *net) INIT_LIST_HEAD(&ipvs->rs_table[idx]); INIT_LIST_HEAD(&ipvs->dest_trash); + atomic_set(&ipvs->ftpsvc_counter, 0); + atomic_set(&ipvs->nullsvc_counter, 0); /* procfs stats */ ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL); -- cgit v1.2.3-59-g8ed1b