From fa86d322d89995fef1bfb5cc768b89d8c22ea0d9 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 24 Mar 2008 14:48:59 -0700 Subject: [NEIGH]: Fix race between pneigh deletion and ipv6's ndisc_recv_ns (v3). Proxy neighbors do not have any reference counting, so any caller of pneigh_lookup (unless it's a netlink triggered add/del routine) should _not_ perform any actions on the found proxy entry. There's one exception from this rule - the ipv6's ndisc_recv_ns() uses found entry to check the flags for NTF_ROUTER. This creates a race between the ndisc and pneigh_delete - after the pneigh is returned to the caller, the nd_tbl.lock is dropped and the deleting procedure may proceed. One of the fixes would be to add a reference counting, but this problem exists for ndisc only. Besides such a patch would be too big for -rc4. So I propose to introduce a __pneigh_lookup() which is supposed to be called with the lock held and use it in ndisc code to check the flags on alive pneigh entry. Changes from v2: As David noticed, Exported the __pneigh_lookup() to ipv6 module. The checkpatch generates a warning on it, since the EXPORT_SYMBOL does not follow the symbol itself, but in this file all the exports come at the end, so I decided no to break this harmony. Changes from v1: Fixed comments from YOSHIFUJI - indentation of prototype in header and the pndisc_check_router() name - and a compilation fix, pointed by Daniel - the is_routed was (falsely) considered as uninitialized by gcc. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/neighbour.c | 23 +++++++++++++++++++++++ net/ipv6/ndisc.c | 22 ++++++++++++++++++---- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d9a02b2cc289..19b8e003f150 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -466,6 +466,28 @@ out_neigh_release: goto out; } +struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, + struct net *net, const void *pkey, struct net_device *dev) +{ + struct pneigh_entry *n; + int key_len = tbl->key_len; + u32 hash_val = *(u32 *)(pkey + key_len - 4); + + hash_val ^= (hash_val >> 16); + hash_val ^= hash_val >> 8; + hash_val ^= hash_val >> 4; + hash_val &= PNEIGH_HASHMASK; + + for (n = tbl->phash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->key, pkey, key_len) && + (n->net == net) && + (n->dev == dev || !n->dev)) + break; + } + + return n; +} + struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev, int creat) @@ -2803,6 +2825,7 @@ EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup); +EXPORT_SYMBOL_GPL(__pneigh_lookup); #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 51557c27a0cd..452a2ac4eec8 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -676,6 +676,20 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) } } +static struct pneigh_entry *pndisc_check_router(struct net_device *dev, + struct in6_addr *addr, int *is_router) +{ + struct pneigh_entry *n; + + read_lock_bh(&nd_tbl.lock); + n = __pneigh_lookup(&nd_tbl, &init_net, addr, dev); + if (n != NULL) + *is_router = (n->flags & NTF_ROUTER); + read_unlock_bh(&nd_tbl.lock); + + return n; +} + static void ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); @@ -692,7 +706,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; - int is_router; + int is_router = 0; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -790,8 +804,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, &init_net, - &msg->target, dev, 0)) != NULL)) { + (pneigh = pndisc_check_router(dev, &msg->target, + &is_router)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -812,7 +826,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } - is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); + is_router = !!(pneigh ? is_router : idev->cnf.forwarding); if (dad) { struct in6_addr maddr; -- cgit v1.2.3-59-g8ed1b From df9dcb4588aca9cc243cf1f3f454361a84e1cbdb Mon Sep 17 00:00:00 2001 From: Kazunori MIYAZAWA Date: Mon, 24 Mar 2008 14:51:51 -0700 Subject: [IPSEC]: Fix inter address family IPsec tunnel handling. Signed-off-by: Kazunori MIYAZAWA Signed-off-by: David S. Miller --- include/net/xfrm.h | 23 +++++++++++++++++++ net/ipv4/xfrm4_mode_tunnel.c | 2 +- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_mode_tunnel.c | 2 +- net/ipv6/xfrm6_output.c | 2 +- net/key/af_key.c | 2 +- net/xfrm/xfrm_input.c | 22 +++++++++++++++--- net/xfrm/xfrm_output.c | 18 ++++++++++++++- net/xfrm/xfrm_state.c | 54 ++++++++++++++++++++++++++++++++++++++------ net/xfrm/xfrm_user.c | 7 ++---- 10 files changed, 113 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 619c53bc3cd2..4e6f9568cbe7 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -204,6 +204,7 @@ struct xfrm_state * transformer. */ const struct xfrm_type *type; struct xfrm_mode *inner_mode; + struct xfrm_mode *inner_mode_iaf; struct xfrm_mode *outer_mode; /* Security context */ @@ -387,6 +388,27 @@ enum { extern int xfrm_register_mode(struct xfrm_mode *mode, int family); extern int xfrm_unregister_mode(struct xfrm_mode *mode, int family); +static inline int xfrm_af2proto(unsigned int family) +{ + switch(family) { + case AF_INET: + return IPPROTO_IPIP; + case AF_INET6: + return IPPROTO_IPV6; + default: + return 0; + } +} + +static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +{ + if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || + (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) + return x->inner_mode; + else + return x->inner_mode_iaf; +} + struct xfrm_tmpl { /* id in template is interpreted as: @@ -1253,6 +1275,7 @@ extern int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, extern int xfrm_input_resume(struct sk_buff *skb, int nexthdr); extern int xfrm_output_resume(struct sk_buff *skb, int err); extern int xfrm_output(struct sk_buff *skb); +extern int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_extract_header(struct sk_buff *skb); extern int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi, diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 8dee617ee900..584e6d74e3a9 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -41,7 +41,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->ihl = 5; top_iph->version = 4; - top_iph->protocol = x->inner_mode->afinfo->proto; + top_iph->protocol = xfrm_af2proto(skb->dst->ops->family); /* DS disclosed */ top_iph->tos = INET_ECN_encapsulate(XFRM_MODE_SKB_CB(skb)->tos, diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5a58a818021..8c3180adddbf 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -56,7 +56,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 0c742faaa30b..e20529b4c825 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -45,7 +45,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, sizeof(top_iph->flow_lbl)); - top_iph->nexthdr = x->inner_mode->afinfo->proto; + top_iph->nexthdr = xfrm_af2proto(skb->dst->ops->family); dsfield = XFRM_MODE_SKB_CB(skb)->tos; dsfield = INET_ECN_encapsulate(dsfield, dsfield); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 79ccfb080733..0af823cf7f1f 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -62,7 +62,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) { int err; - err = x->inner_mode->afinfo->extract_output(x, skb); + err = xfrm_inner_extract_output(x, skb); if (err) return err; diff --git a/net/key/af_key.c b/net/key/af_key.c index 8b5f486ac80f..e9ef9af4a53b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1219,7 +1219,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 62188c6a06dd..75279402ccf4 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -84,14 +84,21 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_mode *inner_mode = x->inner_mode; int err; err = x->outer_mode->afinfo->extract_input(x, skb); if (err) return err; - skb->protocol = x->inner_mode->afinfo->eth_proto; - return x->inner_mode->input2(x, skb); + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + return -EAFNOSUPPORT; + } + + skb->protocol = inner_mode->afinfo->eth_proto; + return inner_mode->input2(x, skb); } EXPORT_SYMBOL(xfrm_prepare_input); @@ -101,6 +108,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) __be32 seq; struct xfrm_state *x; xfrm_address_t *daddr; + struct xfrm_mode *inner_mode; unsigned int family; int decaps = 0; int async = 0; @@ -207,7 +215,15 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - if (x->inner_mode->input(x, skb)) { + inner_mode = x->inner_mode; + + if (x->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); + if (inner_mode == NULL) + goto drop; + } + + if (inner_mode->input(x, skb)) { XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 569d377932c4..2519129c6d21 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -124,7 +124,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) if (!x) return dst_output(skb); - err = nf_hook(x->inner_mode->afinfo->family, + err = nf_hook(skb->dst->ops->family, NF_INET_POST_ROUTING, skb, NULL, skb->dst->dev, xfrm_output2); if (unlikely(err != 1)) @@ -193,4 +193,20 @@ int xfrm_output(struct sk_buff *skb) return xfrm_output2(skb); } + +int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_mode *inner_mode; + if (x->sel.family == AF_UNSPEC) + inner_mode = xfrm_ip2inner_mode(x, + xfrm_af2proto(skb->dst->ops->family)); + else + inner_mode = x->inner_mode; + + if (inner_mode == NULL) + return -EAFNOSUPPORT; + return inner_mode->afinfo->extract_output(x, skb); +} + EXPORT_SYMBOL_GPL(xfrm_output); +EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7ba65e82941c..58f1f9347b54 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -388,6 +388,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->coaddr); if (x->inner_mode) xfrm_put_mode(x->inner_mode); + if (x->inner_mode_iaf) + xfrm_put_mode(x->inner_mode_iaf); if (x->outer_mode) xfrm_put_mode(x->outer_mode); if (x->type) { @@ -523,6 +525,8 @@ struct xfrm_state *xfrm_state_alloc(void) x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; + x->inner_mode = NULL; + x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@ -796,7 +800,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, selector. */ if (x->km.state == XFRM_STATE_VALID) { - if (!xfrm_selector_match(&x->sel, fl, x->sel.family) || + if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || @@ -1944,6 +1948,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) int xfrm_init_state(struct xfrm_state *x) { struct xfrm_state_afinfo *afinfo; + struct xfrm_mode *inner_mode; int family = x->props.family; int err; @@ -1962,13 +1967,48 @@ int xfrm_init_state(struct xfrm_state *x) goto error; err = -EPROTONOSUPPORT; - x->inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); - if (x->inner_mode == NULL) - goto error; - if (!(x->inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) - goto error; + if (x->sel.family != AF_UNSPEC) { + inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && + family != x->sel.family) { + xfrm_put_mode(inner_mode); + goto error; + } + + x->inner_mode = inner_mode; + } else { + struct xfrm_mode *inner_mode_iaf; + + inner_mode = xfrm_get_mode(x->props.mode, AF_INET); + if (inner_mode == NULL) + goto error; + + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode); + goto error; + } + + inner_mode_iaf = xfrm_get_mode(x->props.mode, AF_INET6); + if (inner_mode_iaf == NULL) + goto error; + + if (!(inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)) { + xfrm_put_mode(inner_mode_iaf); + goto error; + } + + if (x->props.family == AF_INET) { + x->inner_mode = inner_mode; + x->inner_mode_iaf = inner_mode_iaf; + } else { + x->inner_mode = inner_mode_iaf; + x->inner_mode_iaf = inner_mode; + } + } x->type = xfrm_get_type(x->id.proto, family); if (x->type == NULL) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f971ca5645f8..5d96f2728dc6 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -288,12 +288,9 @@ static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info * memcpy(&x->props.saddr, &p->saddr, sizeof(x->props.saddr)); x->props.flags = p->flags; - /* - * Set inner address family if the KM left it as zero. - * See comment in validate_tmpl. - */ - if (!x->sel.family) + if (x->props.mode == XFRM_MODE_TRANSPORT) x->sel.family = p->family; + } /* -- cgit v1.2.3-59-g8ed1b From 0ed21b321a13421e2dfeaa70a6c324e05e3e91e6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Mar 2008 00:15:17 -0700 Subject: [VLAN]: Don't copy ALLMULTI/PROMISC flags from underlying device Changing these flags requires to use dev_set_allmulti/dev_set_promiscuity or dev_change_flags. Setting it directly causes two unwanted effects: - the next dev_change_flags call will notice a difference between dev->gflags and the actual flags, enable promisc/allmulti mode and incorrectly update dev->gflags - this keeps the underlying device in promisc/allmulti mode until the VLAN device is deleted Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 8fbcefe10c9f..480ea90e7dcd 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -660,7 +660,7 @@ static int vlan_dev_init(struct net_device *dev) int subclass = 0; /* IFF_BROADCAST|IFF_MULTICAST; ??? */ - dev->flags = real_dev->flags & ~IFF_UP; + dev->flags = real_dev->flags & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI); dev->iflink = real_dev->ifindex; dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))) | -- cgit v1.2.3-59-g8ed1b From 8c7230f781749cd7261b504c0bfa188bb96e77ee Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Mar 2008 00:55:50 -0700 Subject: [IRDA]: Store irnet_socket termios properly. It should be a "struct ktermios" not a "struct termios". Based upon a build warning reported by Stephen Rothwell. Signed-off-by: David S. Miller --- net/irda/irnet/irnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h index bc2e15ce7004..7873c392ab4c 100644 --- a/net/irda/irnet/irnet.h +++ b/net/irda/irnet/irnet.h @@ -405,7 +405,7 @@ typedef struct irnet_socket /* "pppd" interact directly with us on a /dev/ file */ struct file * file; /* File descriptor of this instance */ /* TTY stuff - to keep "pppd" happy */ - struct termios termios; /* Various tty flags */ + struct ktermios termios; /* Various tty flags */ /* Stuff for the control channel */ int event_index; /* Last read in the event log */ -- cgit v1.2.3-59-g8ed1b From 61ee6bd487b9cc160e533034eb338f2085dc7922 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 26 Mar 2008 02:12:11 -0700 Subject: [NET]: Fix multicast device ioctl checks SIOCADDMULTI/SIOCDELMULTI check whether the driver has a set_multicast_list method to determine whether it supports multicast. Drivers implementing secondary unicast support use set_rx_mode however. Check for both dev->set_multicast_mode and dev->set_rx_mode to determine multicast capabilities. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fcdf03cf3b3f..460e7f99ce3e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3329,7 +3329,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if (!dev->set_multicast_list || + if ((!dev->set_multicast_list && !dev->set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -3338,7 +3338,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) dev->addr_len, 1); case SIOCDELMULTI: - if (!dev->set_multicast_list || + if ((!dev->set_multicast_list && !dev->set_rx_mode) || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) -- cgit v1.2.3-59-g8ed1b From 5c2e2e239ebe44e3fdc5f2ae270d96c4ceee4e9a Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Wed, 26 Mar 2008 02:14:38 -0700 Subject: [AX25]: Remove obsolete references to BKL from TODO file. Given that there are no apparent calls to lock_kernel() or unlock_kernel() under net/ax25, delete the TODO reference related to that. Signed-off-by: Robert P. J. Day Signed-off-by: David S. Miller --- net/ax25/TODO | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net') diff --git a/net/ax25/TODO b/net/ax25/TODO index 4089c49e45cc..69fb4e368d92 100644 --- a/net/ax25/TODO +++ b/net/ax25/TODO @@ -9,10 +9,6 @@ being used. Routes to a device being taken down might be deleted by ax25_rt_device_down but added by somebody else before the device has been deleted fully. -Massive amounts of lock_kernel / unlock_kernel are just a temporary solution to -get around the removal of SOCKOPS_WRAP. A serious locking strategy has to be -implemented. - The ax25_rt_find_route synopsys is pervert but I somehow had to deal with the race caused by the static variable in it's previous implementation. -- cgit v1.2.3-59-g8ed1b From 7c0ecc4c4f8fd90988aab8a95297b9c0038b6160 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 26 Mar 2008 02:27:09 -0700 Subject: [ICMP]: Dst entry leak in icmp_send host re-lookup code (v2). Commit 8b7817f3a959ed99d7443afc12f78a7e1fcc2063 ([IPSEC]: Add ICMP host relookup support) introduced some dst leaks on error paths: the rt pointer can be forgotten to be put. Fix it bu going to a proper label. Found after net namespace's lo refused to unregister :) Many thanks to Den for valuable help during debugging. Herbert pointed out, that xfrm_lookup() will put the rtable in case of error itself, so the first goto fix is redundant. Signed-off-by: Pavel Emelyanov Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a13c074dac09..a944e8053e28 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -591,7 +591,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (xfrm_decode_session_reverse(skb_in, &fl, AF_INET)) - goto out_unlock; + goto ende; if (inet_addr_type(net, fl.fl4_src) == RTN_LOCAL) err = __ip_route_output_key(net, &rt2, &fl); @@ -601,7 +601,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) fl2.fl4_dst = fl.fl4_src; if (ip_route_output_key(net, &rt2, &fl2)) - goto out_unlock; + goto ende; /* Ugh! */ odst = skb_in->dst; @@ -614,7 +614,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) } if (err) - goto out_unlock; + goto ende; err = xfrm_lookup((struct dst_entry **)&rt2, &fl, NULL, XFRM_LOOKUP_ICMP); -- cgit v1.2.3-59-g8ed1b From 732c8bd590625e8bc0b88313b82930e336b2bec4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 26 Mar 2008 16:51:09 -0700 Subject: [IPSEC]: Fix BEET output The IPv6 BEET output function is incorrectly including the inner header in the payload to be protected. This causes a crash as the packet doesn't actually have that many bytes for a second header. The IPv4 BEET output on the other hand is broken when it comes to handling an inner IPv6 header since it always assumes an inner IPv4 header. This patch fixes both by making sure that neither BEET output function touches the inner header at all. All access is now done through the protocol-independent cb structure. Two new attributes are added to make this work, the IP header length and the IPv4 option length. They're filled in by the inner mode's output function. Thanks to Joakim Koskela for finding this problem. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/ipv4/xfrm4_mode_beet.c | 11 +++++------ net/ipv4/xfrm4_state.c | 2 ++ net/ipv6/xfrm6_mode_beet.c | 1 + net/ipv6/xfrm6_state.c | 2 ++ 5 files changed, 16 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4e6f9568cbe7..0d255ae008b6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -552,6 +552,9 @@ struct xfrm_mode_skb_cb { __be16 id; __be16 frag_off; + /* IP header length (excluding options or extension headers). */ + u8 ihl; + /* TOS for IPv4, class for IPv6. */ u8 tos; @@ -561,6 +564,9 @@ struct xfrm_mode_skb_cb { /* Protocol for IPv4, NH for IPv6. */ u8 protocol; + /* Option length for IPv4, zero for IPv6. */ + u8 optlen; + /* Used by IPv6 only, zero for IPv4. */ u8 flow_lbl[3]; }; diff --git a/net/ipv4/xfrm4_mode_beet.c b/net/ipv4/xfrm4_mode_beet.c index b47030ba162b..9c798abce736 100644 --- a/net/ipv4/xfrm4_mode_beet.c +++ b/net/ipv4/xfrm4_mode_beet.c @@ -39,13 +39,11 @@ static void xfrm4_beet_make_header(struct sk_buff *skb) static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) { struct ip_beet_phdr *ph; - struct iphdr *iph, *top_iph; + struct iphdr *top_iph; int hdrlen, optlen; - iph = ip_hdr(skb); - hdrlen = 0; - optlen = iph->ihl * 4 - sizeof(*iph); + optlen = XFRM_MODE_SKB_CB(skb)->optlen; if (unlikely(optlen)) hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4); @@ -53,11 +51,12 @@ static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb) hdrlen); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); - skb->transport_header = skb->network_header + sizeof(*iph); + skb->transport_header = skb->network_header + sizeof(*top_iph); xfrm4_beet_make_header(skb); - ph = (struct ip_beet_phdr *)__skb_pull(skb, sizeof(*iph) - hdrlen); + ph = (struct ip_beet_phdr *) + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl - hdrlen); top_iph = ip_hdr(skb); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index fdeebe68a379..07735ed280d7 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -52,10 +52,12 @@ int xfrm4_extract_header(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = iph->id; XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; XFRM_MODE_SKB_CB(skb)->tos = iph->tos; XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; + XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 0527d11c1ae3..d6ce400f585f 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -45,6 +45,7 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb) skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); skb->transport_header = skb->network_header + sizeof(*top_iph); + __skb_pull(skb, XFRM_MODE_SKB_CB(skb)->ihl); xfrm6_beet_make_header(skb); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index dc817e035e23..ff1e1db8e236 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -174,10 +174,12 @@ int xfrm6_extract_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); + XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = 0; XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; + XFRM_MODE_SKB_CB(skb)->optlen = 0; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); -- cgit v1.2.3-59-g8ed1b From 920fc941a9617f95ccb283037fe6f8a38d95bb69 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 27 Mar 2008 16:08:03 -0700 Subject: [ESP]: Ensure IV is in linear part of the skb to avoid BUG() due to OOB access ESP does not account for the IV size when calling pskb_may_pull() to ensure everything it accesses directly is within the linear part of a potential fragment. This results in a BUG() being triggered when the both the IPv4 and IPv6 ESP stack is fed with an skb where the first fragment ends between the end of the esp header and the end of the IV. This bug was found by Dirk Nehring . Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 2 +- net/ipv6/esp6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f3ceca31aa45..4e73e5708e70 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -336,7 +336,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct scatterlist *asg; int err = -EINVAL; - if (!pskb_may_pull(skb, sizeof(*esph))) + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) goto out; if (elen <= 0) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 0ec1402320ea..c6bb4c6d24b3 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -282,7 +282,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct scatterlist *sg; struct scatterlist *asg; - if (!pskb_may_pull(skb, sizeof(*esph))) { + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) { ret = -EINVAL; goto out; } -- cgit v1.2.3-59-g8ed1b From 8eeee8b152ae6bbe181518efaf62ba8e9c613693 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 27 Mar 2008 16:55:53 -0700 Subject: [NETFILTER]: Replate direct proc_fops assignment with proc_create call. This elliminates infamous race during module loading when one could lookup proc entry without proc_fops assigned. Signed-off-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 8 +++----- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++--- net/ipv4/netfilter/ipt_recent.c | 3 +-- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 7 ++----- net/ipv6/netfilter/ip6_queue.c | 8 +++----- net/netfilter/nf_conntrack_standalone.c | 9 +++------ net/netfilter/nf_log.c | 8 ++------ net/netfilter/nf_queue.c | 7 ++----- net/netfilter/nfnetlink_log.c | 9 ++------- net/netfilter/nfnetlink_queue.c | 9 ++------- net/netfilter/xt_hashlimit.c | 16 ++++++++-------- 11 files changed, 30 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index fe05da41d6ba..4dc162894cb2 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -588,11 +588,9 @@ static int __init ip_queue_init(void) } #ifdef CONFIG_PROC_FS - proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); - if (proc) { - proc->owner = THIS_MODULE; - proc->proc_fops = &ip_queue_proc_fops; - } else { + proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, + &ip_queue_proc_fops); + if (!proc) { printk(KERN_ERR "ip_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index c6cf84c77611..52926c8e3cc1 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -167,14 +167,13 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, __be32 ip, /* create proc dir entry */ sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, - clusterip_procdir); + c->pde = proc_create(buffer, S_IWUSR|S_IRUSR, + clusterip_procdir, &clusterip_proc_fops); if (!c->pde) { kfree(c); return NULL; } } - c->pde->proc_fops = &clusterip_proc_fops; c->pde->data = c; #endif diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 8e8f0425a8ed..50e06690eb5b 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -276,12 +276,11 @@ recent_mt_check(const char *tablename, const void *ip, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = create_proc_entry(t->name, ip_list_perms, proc_dir); + t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops); if (t->proc == NULL) { kfree(t); goto out; } - t->proc->proc_fops = &recent_fops; t->proc->uid = ip_list_uid; t->proc->gid = ip_list_gid; t->proc->data = t; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 089252e82c01..f500b0fdaef4 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -395,13 +395,10 @@ int __init nf_conntrack_ipv4_compat_init(void) if (!proc_exp) goto err2; - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat); + proc_stat = proc_create("ip_conntrack", S_IRUGO, + init_net.proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; - - proc_stat->proc_fops = &ct_cpu_seq_fops; - proc_stat->owner = THIS_MODULE; - return 0; err3: diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index cc2f9afcf808..8d366f7f2a9a 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -591,11 +591,9 @@ static int __init ip6_queue_init(void) } #ifdef CONFIG_PROC_FS - proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); - if (proc) { - proc->owner = THIS_MODULE; - proc->proc_fops = &ip6_queue_proc_fops; - } else { + proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, + &ip6_queue_proc_fops); + if (!proc) { printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index e88e96af613d..8599068050ec 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -395,7 +395,7 @@ EXPORT_SYMBOL_GPL(nf_ct_log_invalid); static int __init nf_conntrack_standalone_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc, *proc_stat; + struct proc_dir_entry *proc; #endif int ret = 0; @@ -407,12 +407,9 @@ static int __init nf_conntrack_standalone_init(void) proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat); - if (!proc_stat) + if (!proc_create("nf_conntrack", S_IRUGO, + init_net.proc_net_stat, &ct_cpu_seq_fops)) goto cleanup_proc; - - proc_stat->proc_fops = &ct_cpu_seq_fops; - proc_stat->owner = THIS_MODULE; #endif #ifdef CONFIG_SYSCTL nf_ct_sysctl_header = register_sysctl_paths(nf_ct_path, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index cec9976aecbf..bc11d7092032 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -168,13 +168,9 @@ static const struct file_operations nflog_file_ops = { int __init netfilter_log_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; - - pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); - if (!pde) + if (!proc_create("nf_log", S_IRUGO, + proc_net_netfilter, &nflog_file_ops)) return -1; - - pde->proc_fops = &nflog_file_ops; #endif return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index ddc80ea114cd..bbd26893c0c4 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -348,12 +348,9 @@ static const struct file_operations nfqueue_file_ops = { int __init netfilter_queue_init(void) { #ifdef CONFIG_PROC_FS - struct proc_dir_entry *pde; - - pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); - if (!pde) + if (!proc_create("nf_queue", S_IRUGO, + proc_net_netfilter, &nfqueue_file_ops)) return -1; - pde->proc_fops = &nfqueue_file_ops; #endif return 0; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index bf3f19b21fe4..b8173af8c24a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -923,9 +923,6 @@ static const struct file_operations nful_file_ops = { static int __init nfnetlink_log_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nful; -#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -943,11 +940,9 @@ static int __init nfnetlink_log_init(void) } #ifdef CONFIG_PROC_FS - proc_nful = create_proc_entry("nfnetlink_log", 0440, - proc_net_netfilter); - if (!proc_nful) + if (!proc_create("nfnetlink_log", 0440, + proc_net_netfilter, &nful_file_ops)) goto cleanup_subsys; - proc_nful->proc_fops = &nful_file_ops; #endif return status; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 012cb6910820..10522c04ed24 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -896,9 +896,6 @@ static const struct file_operations nfqnl_file_ops = { static int __init nfnetlink_queue_init(void) { int i, status = -ENOMEM; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_nfqueue; -#endif for (i = 0; i < INSTANCE_BUCKETS; i++) INIT_HLIST_HEAD(&instance_table[i]); @@ -911,11 +908,9 @@ static int __init nfnetlink_queue_init(void) } #ifdef CONFIG_PROC_FS - proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, - proc_net_netfilter); - if (!proc_nfqueue) + if (!proc_create("nfnetlink_queue", 0440, + proc_net_netfilter, &nfqnl_file_ops)) goto cleanup_subsys; - proc_nfqueue->proc_fops = &nfqnl_file_ops; #endif register_netdevice_notifier(&nfqnl_dev_notifier); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 5418ce59ac3a..dc29007c52cd 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -237,14 +237,14 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6); + hinfo->pde = proc_create(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6, + &dl_file_ops); if (!hinfo->pde) { vfree(hinfo); return -1; } - hinfo->pde->proc_fops = &dl_file_ops; hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo); @@ -301,14 +301,14 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6); + hinfo->pde = proc_create(minfo->name, 0, + family == AF_INET ? hashlimit_procdir4 : + hashlimit_procdir6, + &dl_file_ops); if (hinfo->pde == NULL) { vfree(hinfo); return -1; } - hinfo->pde->proc_fops = &dl_file_ops; hinfo->pde->data = hinfo; setup_timer(&hinfo->timer, htable_gc, (unsigned long)hinfo); -- cgit v1.2.3-59-g8ed1b