aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /net/ipv6
downloadlinux-dev-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.xz
linux-dev-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig79
-rw-r--r--net/ipv6/Makefile25
-rw-r--r--net/ipv6/addrconf.c3615
-rw-r--r--net/ipv6/af_inet6.c867
-rw-r--r--net/ipv6/ah6.c478
-rw-r--r--net/ipv6/anycast.c594
-rw-r--r--net/ipv6/datagram.c600
-rw-r--r--net/ipv6/esp6.c424
-rw-r--r--net/ipv6/exthdrs.c575
-rw-r--r--net/ipv6/exthdrs_core.c109
-rw-r--r--net/ipv6/icmp.c822
-rw-r--r--net/ipv6/ip6_fib.c1225
-rw-r--r--net/ipv6/ip6_flowlabel.c706
-rw-r--r--net/ipv6/ip6_input.c269
-rw-r--r--net/ipv6/ip6_output.c1197
-rw-r--r--net/ipv6/ip6_tunnel.c1163
-rw-r--r--net/ipv6/ipcomp6.c524
-rw-r--r--net/ipv6/ipv6_sockglue.c704
-rw-r--r--net/ipv6/ipv6_syms.c41
-rw-r--r--net/ipv6/mcast.c2499
-rw-r--r--net/ipv6/ndisc.c1690
-rw-r--r--net/ipv6/netfilter/Kconfig242
-rw-r--r--net/ipv6/netfilter/Makefile26
-rw-r--r--net/ipv6/netfilter/ip6_queue.c741
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1970
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c509
-rw-r--r--net/ipv6/netfilter/ip6t_MARK.c78
-rw-r--r--net/ipv6/netfilter/ip6t_ah.c208
-rw-r--r--net/ipv6/netfilter/ip6t_dst.c298
-rw-r--r--net/ipv6/netfilter/ip6t_esp.c181
-rw-r--r--net/ipv6/netfilter/ip6t_eui64.c101
-rw-r--r--net/ipv6/netfilter/ip6t_frag.c229
-rw-r--r--net/ipv6/netfilter/ip6t_hbh.c298
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c80
-rw-r--r--net/ipv6/netfilter/ip6t_ipv6header.c167
-rw-r--r--net/ipv6/netfilter/ip6t_length.c66
-rw-r--r--net/ipv6/netfilter/ip6t_limit.c147
-rw-r--r--net/ipv6/netfilter/ip6t_mac.c80
-rw-r--r--net/ipv6/netfilter/ip6t_mark.c66
-rw-r--r--net/ipv6/netfilter/ip6t_multiport.c125
-rw-r--r--net/ipv6/netfilter/ip6t_owner.c174
-rw-r--r--net/ipv6/netfilter/ip6t_physdev.c135
-rw-r--r--net/ipv6/netfilter/ip6t_rt.c301
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c214
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c287
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c182
-rw-r--r--net/ipv6/proc.c303
-rw-r--r--net/ipv6/protocol.c86
-rw-r--r--net/ipv6/raw.c1157
-rw-r--r--net/ipv6/reassembly.c771
-rw-r--r--net/ipv6/route.c2131
-rw-r--r--net/ipv6/sit.c833
-rw-r--r--net/ipv6/sysctl_net_ipv6.c125
-rw-r--r--net/ipv6/tcp_ipv6.c2265
-rw-r--r--net/ipv6/udp.c1075
-rw-r--r--net/ipv6/xfrm6_input.c150
-rw-r--r--net/ipv6/xfrm6_output.c143
-rw-r--r--net/ipv6/xfrm6_policy.c295
-rw-r--r--net/ipv6/xfrm6_state.c136
-rw-r--r--net/ipv6/xfrm6_tunnel.c543
60 files changed, 35124 insertions, 0 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
new file mode 100644
index 000000000000..e66ca9381cfd
--- /dev/null
+++ b/net/ipv6/Kconfig
@@ -0,0 +1,79 @@
+#
+# IPv6 configuration
+#
+config IPV6_PRIVACY
+ bool "IPv6: Privacy Extensions (RFC 3041) support"
+ depends on IPV6
+ ---help---
+ Privacy Extensions for Stateless Address Autoconfiguration in IPv6
+ support. With this option, additional periodically-alter
+ pseudo-random global-scope unicast address(es) will assigned to
+ your interface(s).
+
+ By default, kernel do not generate temporary addresses.
+ To use temporary addresses, do
+
+ echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
+
+ See <file:Documentation/networking/ip-sysctl.txt> for details.
+
+config INET6_AH
+ tristate "IPv6: AH transformation"
+ depends on IPV6
+ select XFRM
+ select CRYPTO
+ select CRYPTO_HMAC
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ ---help---
+ Support for IPsec AH.
+
+ If unsure, say Y.
+
+config INET6_ESP
+ tristate "IPv6: ESP transformation"
+ depends on IPV6
+ select XFRM
+ select CRYPTO
+ select CRYPTO_HMAC
+ select CRYPTO_MD5
+ select CRYPTO_SHA1
+ select CRYPTO_DES
+ ---help---
+ Support for IPsec ESP.
+
+ If unsure, say Y.
+
+config INET6_IPCOMP
+ tristate "IPv6: IPComp transformation"
+ depends on IPV6
+ select XFRM
+ select INET6_TUNNEL
+ select CRYPTO
+ select CRYPTO_DEFLATE
+ ---help---
+ Support for IP Payload Compression Protocol (IPComp) (RFC3173),
+ typically needed for IPsec.
+
+ If unsure, say Y.
+
+config INET6_TUNNEL
+ tristate "IPv6: tunnel transformation"
+ depends on IPV6
+ select XFRM
+ ---help---
+ Support for generic IPv6-in-IPv6 tunnel transformation, which is
+ required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
+ IPComp.
+
+ If unsure, say Y.
+
+config IPV6_TUNNEL
+ tristate "IPv6: IPv6-in-IPv6 tunnel"
+ depends on IPV6
+ select INET6_TUNNEL
+ ---help---
+ Support for IPv6-in-IPv6 tunnels described in RFC 2473.
+
+ If unsure, say N.
+
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
new file mode 100644
index 000000000000..b39e04940590
--- /dev/null
+++ b/net/ipv6/Makefile
@@ -0,0 +1,25 @@
+#
+# Makefile for the Linux TCP/IP (INET6) layer.
+#
+
+obj-$(CONFIG_IPV6) += ipv6.o
+
+ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
+ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
+ protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
+ ip6_flowlabel.o ipv6_syms.o
+
+ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
+ xfrm6_output.o
+ipv6-objs += $(ipv6-y)
+
+obj-$(CONFIG_INET6_AH) += ah6.o
+obj-$(CONFIG_INET6_ESP) += esp6.o
+obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
+obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o
+obj-$(CONFIG_NETFILTER) += netfilter/
+
+obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+
+obj-y += exthdrs_core.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
new file mode 100644
index 000000000000..5ffde14ddc09
--- /dev/null
+++ b/net/ipv6/addrconf.c
@@ -0,0 +1,3615 @@
+/*
+ * IPv6 Address [auto]configuration
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * $Id: addrconf.c,v 1.69 2001/10/31 21:55:54 davem Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Janos Farkas : delete timer on ifdown
+ * <chexum@bankinf.banki.hu>
+ * Andi Kleen : kill double kfree on module
+ * unload.
+ * Maciej W. Rozycki : FDDI support
+ * sekiya@USAGI : Don't send too many RS
+ * packets.
+ * yoshfuji@USAGI : Fixed interval between DAD
+ * packets.
+ * YOSHIFUJI Hideaki @USAGI : improved accuracy of
+ * address validation timer.
+ * YOSHIFUJI Hideaki @USAGI : Privacy Extensions (RFC3041)
+ * support.
+ * Yuji SEKIYA @USAGI : Don't assign a same IPv6
+ * address on a same interface.
+ * YOSHIFUJI Hideaki @USAGI : ARCnet support
+ * YOSHIFUJI Hideaki @USAGI : convert /proc/net/if_inet6 to
+ * seq_file.
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_arcnet.h>
+#include <linux/if_infiniband.h>
+#include <linux/route.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+#include <linux/delay.h>
+#include <linux/notifier.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/tcp.h>
+#include <net/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/rtnetlink.h>
+
+#ifdef CONFIG_IPV6_PRIVACY
+#include <linux/random.h>
+#include <linux/crypto.h>
+#include <asm/scatterlist.h>
+#endif
+
+#include <asm/uaccess.h>
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+/* Set to 3 to get tracing... */
+#define ACONF_DEBUG 2
+
+#if ACONF_DEBUG >= 3
+#define ADBG(x) printk x
+#else
+#define ADBG(x)
+#endif
+
+#define INFINITY_LIFE_TIME 0xFFFFFFFF
+#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+
+#ifdef CONFIG_SYSCTL
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p);
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p);
+#endif
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int __ipv6_regen_rndid(struct inet6_dev *idev);
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void ipv6_regen_rndid(unsigned long data);
+
+static int desync_factor = MAX_DESYNC_FACTOR * HZ;
+static struct crypto_tfm *md5_tfm;
+static DEFINE_SPINLOCK(md5_tfm_lock);
+#endif
+
+static int ipv6_count_addresses(struct inet6_dev *idev);
+
+/*
+ * Configured unicast address hash table
+ */
+static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE];
+static DEFINE_RWLOCK(addrconf_hash_lock);
+
+/* Protects inet6 devices */
+DEFINE_RWLOCK(addrconf_lock);
+
+static void addrconf_verify(unsigned long);
+
+static struct timer_list addr_chk_timer =
+ TIMER_INITIALIZER(addrconf_verify, 0, 0);
+static DEFINE_SPINLOCK(addrconf_verify_lock);
+
+static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
+static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
+
+static int addrconf_ifdown(struct net_device *dev, int how);
+
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags);
+static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
+static void addrconf_rs_timer(unsigned long data);
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+ struct prefix_info *pinfo);
+static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev);
+
+static struct notifier_block *inet6addr_chain;
+
+struct ipv6_devconf ipv6_devconf = {
+ .forwarding = 0,
+ .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+ .mtu6 = IPV6_MIN_MTU,
+ .accept_ra = 1,
+ .accept_redirects = 1,
+ .autoconf = 1,
+ .force_mld_version = 0,
+ .dad_transmits = 1,
+ .rtr_solicits = MAX_RTR_SOLICITATIONS,
+ .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+ .use_tempaddr = 0,
+ .temp_valid_lft = TEMP_VALID_LIFETIME,
+ .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_max_retry = REGEN_MAX_RETRY,
+ .max_desync_factor = MAX_DESYNC_FACTOR,
+#endif
+ .max_addresses = IPV6_MAX_ADDRESSES,
+};
+
+static struct ipv6_devconf ipv6_devconf_dflt = {
+ .forwarding = 0,
+ .hop_limit = IPV6_DEFAULT_HOPLIMIT,
+ .mtu6 = IPV6_MIN_MTU,
+ .accept_ra = 1,
+ .accept_redirects = 1,
+ .autoconf = 1,
+ .dad_transmits = 1,
+ .rtr_solicits = MAX_RTR_SOLICITATIONS,
+ .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL,
+ .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY,
+#ifdef CONFIG_IPV6_PRIVACY
+ .use_tempaddr = 0,
+ .temp_valid_lft = TEMP_VALID_LIFETIME,
+ .temp_prefered_lft = TEMP_PREFERRED_LIFETIME,
+ .regen_max_retry = REGEN_MAX_RETRY,
+ .max_desync_factor = MAX_DESYNC_FACTOR,
+#endif
+ .max_addresses = IPV6_MAX_ADDRESSES,
+};
+
+/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+#if 0
+const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+#endif
+const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+
+int ipv6_addr_type(const struct in6_addr *addr)
+{
+ int type;
+ u32 st;
+
+ st = addr->s6_addr32[0];
+
+ if ((st & htonl(0xFF000000)) == htonl(0xFF000000)) {
+ type = IPV6_ADDR_MULTICAST;
+
+ switch((st & htonl(0x00FF0000))) {
+ case __constant_htonl(0x00010000):
+ type |= IPV6_ADDR_LOOPBACK;
+ break;
+
+ case __constant_htonl(0x00020000):
+ type |= IPV6_ADDR_LINKLOCAL;
+ break;
+
+ case __constant_htonl(0x00050000):
+ type |= IPV6_ADDR_SITELOCAL;
+ break;
+ };
+ return type;
+ }
+
+ type = IPV6_ADDR_UNICAST;
+
+ /* Consider all addresses with the first three bits different of
+ 000 and 111 as finished.
+ */
+ if ((st & htonl(0xE0000000)) != htonl(0x00000000) &&
+ (st & htonl(0xE0000000)) != htonl(0xE0000000))
+ return type;
+
+ if ((st & htonl(0xFFC00000)) == htonl(0xFE800000))
+ return (IPV6_ADDR_LINKLOCAL | type);
+
+ if ((st & htonl(0xFFC00000)) == htonl(0xFEC00000))
+ return (IPV6_ADDR_SITELOCAL | type);
+
+ if ((addr->s6_addr32[0] | addr->s6_addr32[1]) == 0) {
+ if (addr->s6_addr32[2] == 0) {
+ if (addr->s6_addr32[3] == 0)
+ return IPV6_ADDR_ANY;
+
+ if (addr->s6_addr32[3] == htonl(0x00000001))
+ return (IPV6_ADDR_LOOPBACK | type);
+
+ return (IPV6_ADDR_COMPATv4 | type);
+ }
+
+ if (addr->s6_addr32[2] == htonl(0x0000ffff))
+ return IPV6_ADDR_MAPPED;
+ }
+
+ st &= htonl(0xFF000000);
+ if (st == 0)
+ return IPV6_ADDR_RESERVED;
+ st &= htonl(0xFE000000);
+ if (st == htonl(0x02000000))
+ return IPV6_ADDR_RESERVED; /* for NSAP */
+ if (st == htonl(0x04000000))
+ return IPV6_ADDR_RESERVED; /* for IPX */
+ return type;
+}
+
+static void addrconf_del_timer(struct inet6_ifaddr *ifp)
+{
+ if (del_timer(&ifp->timer))
+ __in6_ifa_put(ifp);
+}
+
+enum addrconf_timer_t
+{
+ AC_NONE,
+ AC_DAD,
+ AC_RS,
+};
+
+static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
+ enum addrconf_timer_t what,
+ unsigned long when)
+{
+ if (!del_timer(&ifp->timer))
+ in6_ifa_hold(ifp);
+
+ switch (what) {
+ case AC_DAD:
+ ifp->timer.function = addrconf_dad_timer;
+ break;
+ case AC_RS:
+ ifp->timer.function = addrconf_rs_timer;
+ break;
+ default:;
+ }
+ ifp->timer.expires = jiffies + when;
+ add_timer(&ifp->timer);
+}
+
+/* Nobody refers to this device, we may destroy it. */
+
+void in6_dev_finish_destroy(struct inet6_dev *idev)
+{
+ struct net_device *dev = idev->dev;
+ BUG_TRAP(idev->addr_list==NULL);
+ BUG_TRAP(idev->mc_list==NULL);
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "in6_dev_finish_destroy: %s\n", dev ? dev->name : "NIL");
+#endif
+ dev_put(dev);
+ if (!idev->dead) {
+ printk("Freeing alive inet6 device %p\n", idev);
+ return;
+ }
+ snmp6_free_dev(idev);
+ kfree(idev);
+}
+
+static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
+{
+ struct inet6_dev *ndev;
+
+ ASSERT_RTNL();
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ return NULL;
+
+ ndev = kmalloc(sizeof(struct inet6_dev), GFP_KERNEL);
+
+ if (ndev) {
+ memset(ndev, 0, sizeof(struct inet6_dev));
+
+ rwlock_init(&ndev->lock);
+ ndev->dev = dev;
+ memcpy(&ndev->cnf, &ipv6_devconf_dflt, sizeof(ndev->cnf));
+ ndev->cnf.mtu6 = dev->mtu;
+ ndev->cnf.sysctl = NULL;
+ ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
+ if (ndev->nd_parms == NULL) {
+ kfree(ndev);
+ return NULL;
+ }
+ /* We refer to the device */
+ dev_hold(dev);
+
+ if (snmp6_alloc_dev(ndev) < 0) {
+ ADBG((KERN_WARNING
+ "%s(): cannot allocate memory for statistics; dev=%s.\n",
+ __FUNCTION__, dev->name));
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return NULL;
+ }
+
+ if (snmp6_register_dev(ndev) < 0) {
+ ADBG((KERN_WARNING
+ "%s(): cannot create /proc/net/dev_snmp6/%s\n",
+ __FUNCTION__, dev->name));
+ neigh_parms_release(&nd_tbl, ndev->nd_parms);
+ ndev->dead = 1;
+ in6_dev_finish_destroy(ndev);
+ return NULL;
+ }
+
+ /* One reference from device. We must do this before
+ * we invoke __ipv6_regen_rndid().
+ */
+ in6_dev_hold(ndev);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
+ get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
+ init_timer(&ndev->regen_timer);
+ ndev->regen_timer.function = ipv6_regen_rndid;
+ ndev->regen_timer.data = (unsigned long) ndev;
+ if ((dev->flags&IFF_LOOPBACK) ||
+ dev->type == ARPHRD_TUNNEL ||
+ dev->type == ARPHRD_SIT) {
+ printk(KERN_INFO
+ "Disabled Privacy Extensions on device %p(%s)\n",
+ dev, dev->name);
+ ndev->cnf.use_tempaddr = -1;
+ } else {
+ in6_dev_hold(ndev);
+ ipv6_regen_rndid((unsigned long) ndev);
+ }
+#endif
+
+ write_lock_bh(&addrconf_lock);
+ dev->ip6_ptr = ndev;
+ write_unlock_bh(&addrconf_lock);
+
+ ipv6_mc_init_dev(ndev);
+ ndev->tstamp = jiffies;
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(dev, ndev->nd_parms, NET_IPV6,
+ NET_IPV6_NEIGH, "ipv6",
+ &ndisc_ifinfo_sysctl_change,
+ NULL);
+ addrconf_sysctl_register(ndev, &ndev->cnf);
+#endif
+ }
+ return ndev;
+}
+
+static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ if ((idev = __in6_dev_get(dev)) == NULL) {
+ if ((idev = ipv6_add_dev(dev)) == NULL)
+ return NULL;
+ }
+ if (dev->flags&IFF_UP)
+ ipv6_mc_up(idev);
+ return idev;
+}
+
+#ifdef CONFIG_SYSCTL
+static void dev_forward_change(struct inet6_dev *idev)
+{
+ struct net_device *dev;
+ struct inet6_ifaddr *ifa;
+ struct in6_addr addr;
+
+ if (!idev)
+ return;
+ dev = idev->dev;
+ if (dev && (dev->flags & IFF_MULTICAST)) {
+ ipv6_addr_all_routers(&addr);
+
+ if (idev->cnf.forwarding)
+ ipv6_dev_mc_inc(dev, &addr);
+ else
+ ipv6_dev_mc_dec(dev, &addr);
+ }
+ for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+ if (idev->cnf.forwarding)
+ addrconf_join_anycast(ifa);
+ else
+ addrconf_leave_anycast(ifa);
+ }
+}
+
+
+static void addrconf_forward_change(void)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ read_lock(&dev_base_lock);
+ for (dev=dev_base; dev; dev=dev->next) {
+ read_lock(&addrconf_lock);
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding);
+ idev->cnf.forwarding = ipv6_devconf.forwarding;
+ if (changed)
+ dev_forward_change(idev);
+ }
+ read_unlock(&addrconf_lock);
+ }
+ read_unlock(&dev_base_lock);
+}
+#endif
+
+/* Nobody refers to this ifaddr, destroy it */
+
+void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
+{
+ BUG_TRAP(ifp->if_next==NULL);
+ BUG_TRAP(ifp->lst_next==NULL);
+#ifdef NET_REFCNT_DEBUG
+ printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
+#endif
+
+ in6_dev_put(ifp->idev);
+
+ if (del_timer(&ifp->timer))
+ printk("Timer is still running, when freeing ifa=%p\n", ifp);
+
+ if (!ifp->dead) {
+ printk("Freeing alive inet6 address %p\n", ifp);
+ return;
+ }
+ dst_release(&ifp->rt->u.dst);
+
+ kfree(ifp);
+}
+
+/* On success it returns ifp with increased reference count */
+
+static struct inet6_ifaddr *
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
+ int scope, unsigned flags)
+{
+ struct inet6_ifaddr *ifa = NULL;
+ struct rt6_info *rt;
+ int hash;
+ int err = 0;
+
+ read_lock_bh(&addrconf_lock);
+ if (idev->dead) {
+ err = -ENODEV; /*XXX*/
+ goto out2;
+ }
+
+ write_lock(&addrconf_hash_lock);
+
+ /* Ignore adding duplicate addresses on an interface */
+ if (ipv6_chk_same_addr(addr, idev->dev)) {
+ ADBG(("ipv6_add_addr: already assigned\n"));
+ err = -EEXIST;
+ goto out;
+ }
+
+ ifa = kmalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+
+ if (ifa == NULL) {
+ ADBG(("ipv6_add_addr: malloc failed\n"));
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, 0);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ goto out;
+ }
+
+ memset(ifa, 0, sizeof(struct inet6_ifaddr));
+ ipv6_addr_copy(&ifa->addr, addr);
+
+ spin_lock_init(&ifa->lock);
+ init_timer(&ifa->timer);
+ ifa->timer.data = (unsigned long) ifa;
+ ifa->scope = scope;
+ ifa->prefix_len = pfxlen;
+ ifa->flags = flags | IFA_F_TENTATIVE;
+ ifa->cstamp = ifa->tstamp = jiffies;
+
+ ifa->idev = idev;
+ in6_dev_hold(idev);
+ /* For caller */
+ in6_ifa_hold(ifa);
+
+ /* Add to big hash table */
+ hash = ipv6_addr_hash(addr);
+
+ ifa->lst_next = inet6_addr_lst[hash];
+ inet6_addr_lst[hash] = ifa;
+ in6_ifa_hold(ifa);
+ write_unlock(&addrconf_hash_lock);
+
+ write_lock(&idev->lock);
+ /* Add to inet6_dev unicast addr list. */
+ ifa->if_next = idev->addr_list;
+ idev->addr_list = ifa;
+
+#ifdef CONFIG_IPV6_PRIVACY
+ if (ifa->flags&IFA_F_TEMPORARY) {
+ ifa->tmp_next = idev->tempaddr_list;
+ idev->tempaddr_list = ifa;
+ in6_ifa_hold(ifa);
+ }
+#endif
+
+ ifa->rt = rt;
+
+ in6_ifa_hold(ifa);
+ write_unlock(&idev->lock);
+out2:
+ read_unlock_bh(&addrconf_lock);
+
+ if (unlikely(err == 0))
+ notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa);
+ else {
+ kfree(ifa);
+ ifa = ERR_PTR(err);
+ }
+
+ return ifa;
+out:
+ write_unlock(&addrconf_hash_lock);
+ goto out2;
+}
+
+/* This function wants to get referenced ifp and releases it before return */
+
+static void ipv6_del_addr(struct inet6_ifaddr *ifp)
+{
+ struct inet6_ifaddr *ifa, **ifap;
+ struct inet6_dev *idev = ifp->idev;
+ int hash;
+ int deleted = 0, onlink = 0;
+ unsigned long expires = jiffies;
+
+ hash = ipv6_addr_hash(&ifp->addr);
+
+ ifp->dead = 1;
+
+ write_lock_bh(&addrconf_hash_lock);
+ for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
+ ifap = &ifa->lst_next) {
+ if (ifa == ifp) {
+ *ifap = ifa->lst_next;
+ __in6_ifa_put(ifp);
+ ifa->lst_next = NULL;
+ break;
+ }
+ }
+ write_unlock_bh(&addrconf_hash_lock);
+
+ write_lock_bh(&idev->lock);
+#ifdef CONFIG_IPV6_PRIVACY
+ if (ifp->flags&IFA_F_TEMPORARY) {
+ for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
+ ifap = &ifa->tmp_next) {
+ if (ifa == ifp) {
+ *ifap = ifa->tmp_next;
+ if (ifp->ifpub) {
+ in6_ifa_put(ifp->ifpub);
+ ifp->ifpub = NULL;
+ }
+ __in6_ifa_put(ifp);
+ ifa->tmp_next = NULL;
+ break;
+ }
+ }
+ }
+#endif
+
+ for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
+ ifap = &ifa->if_next) {
+ if (ifa == ifp) {
+ *ifap = ifa->if_next;
+ __in6_ifa_put(ifp);
+ ifa->if_next = NULL;
+ if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
+ break;
+ deleted = 1;
+ } else if (ifp->flags & IFA_F_PERMANENT) {
+ if (ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ ifp->prefix_len)) {
+ if (ifa->flags & IFA_F_PERMANENT) {
+ onlink = 1;
+ if (deleted)
+ break;
+ } else {
+ unsigned long lifetime;
+
+ if (!onlink)
+ onlink = -1;
+
+ spin_lock(&ifa->lock);
+ lifetime = min_t(unsigned long,
+ ifa->valid_lft, 0x7fffffffUL/HZ);
+ if (time_before(expires,
+ ifa->tstamp + lifetime * HZ))
+ expires = ifa->tstamp + lifetime * HZ;
+ spin_unlock(&ifa->lock);
+ }
+ }
+ }
+ }
+ write_unlock_bh(&idev->lock);
+
+ ipv6_ifa_notify(RTM_DELADDR, ifp);
+
+ notifier_call_chain(&inet6addr_chain,NETDEV_DOWN,ifp);
+
+ addrconf_del_timer(ifp);
+
+ /*
+ * Purge or update corresponding prefix
+ *
+ * 1) we don't purge prefix here if address was not permanent.
+ * prefix is managed by its own lifetime.
+ * 2) if there're no addresses, delete prefix.
+ * 3) if there're still other permanent address(es),
+ * corresponding prefix is still permanent.
+ * 4) otherwise, update prefix lifetime to the
+ * longest valid lifetime among the corresponding
+ * addresses on the device.
+ * Note: subsequent RA will update lifetime.
+ *
+ * --yoshfuji
+ */
+ if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) {
+ struct in6_addr prefix;
+ struct rt6_info *rt;
+
+ ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len);
+ rt = rt6_lookup(&prefix, NULL, ifp->idev->dev->ifindex, 1);
+
+ if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+ if (onlink == 0) {
+ ip6_del_rt(rt, NULL, NULL);
+ rt = NULL;
+ } else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+ rt->rt6i_expires = expires;
+ rt->rt6i_flags |= RTF_EXPIRES;
+ }
+ }
+ dst_release(&rt->u.dst);
+ }
+
+ in6_ifa_put(ifp);
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
+{
+ struct inet6_dev *idev = ifp->idev;
+ struct in6_addr addr, *tmpaddr;
+ unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_cstamp, tmp_tstamp;
+ int tmp_plen;
+ int ret = 0;
+ int max_addresses;
+
+ write_lock(&idev->lock);
+ if (ift) {
+ spin_lock_bh(&ift->lock);
+ memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
+ spin_unlock_bh(&ift->lock);
+ tmpaddr = &addr;
+ } else {
+ tmpaddr = NULL;
+ }
+retry:
+ in6_dev_hold(idev);
+ if (idev->cnf.use_tempaddr <= 0) {
+ write_unlock(&idev->lock);
+ printk(KERN_INFO
+ "ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ spin_lock_bh(&ifp->lock);
+ if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
+ idev->cnf.use_tempaddr = -1; /*XXX*/
+ spin_unlock_bh(&ifp->lock);
+ write_unlock(&idev->lock);
+ printk(KERN_WARNING
+ "ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ in6_ifa_hold(ifp);
+ memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
+ if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
+ spin_unlock_bh(&ifp->lock);
+ write_unlock(&idev->lock);
+ printk(KERN_WARNING
+ "ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ ret = -1;
+ goto out;
+ }
+ memcpy(&addr.s6_addr[8], idev->rndid, 8);
+ tmp_valid_lft = min_t(__u32,
+ ifp->valid_lft,
+ idev->cnf.temp_valid_lft);
+ tmp_prefered_lft = min_t(__u32,
+ ifp->prefered_lft,
+ idev->cnf.temp_prefered_lft - desync_factor / HZ);
+ tmp_plen = ifp->prefix_len;
+ max_addresses = idev->cnf.max_addresses;
+ tmp_cstamp = ifp->cstamp;
+ tmp_tstamp = ifp->tstamp;
+ spin_unlock_bh(&ifp->lock);
+
+ write_unlock(&idev->lock);
+ ift = !max_addresses ||
+ ipv6_count_addresses(idev) < max_addresses ?
+ ipv6_add_addr(idev, &addr, tmp_plen,
+ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : NULL;
+ if (!ift || IS_ERR(ift)) {
+ in6_ifa_put(ifp);
+ in6_dev_put(idev);
+ printk(KERN_INFO
+ "ipv6_create_tempaddr(): retry temporary address regeneration.\n");
+ tmpaddr = &addr;
+ write_lock(&idev->lock);
+ goto retry;
+ }
+
+ spin_lock_bh(&ift->lock);
+ ift->ifpub = ifp;
+ ift->valid_lft = tmp_valid_lft;
+ ift->prefered_lft = tmp_prefered_lft;
+ ift->cstamp = tmp_cstamp;
+ ift->tstamp = tmp_tstamp;
+ spin_unlock_bh(&ift->lock);
+
+ addrconf_dad_start(ift, 0);
+ in6_ifa_put(ift);
+ in6_dev_put(idev);
+out:
+ return ret;
+}
+#endif
+
+/*
+ * Choose an appropriate source address
+ * should do:
+ * i) get an address with an appropriate scope
+ * ii) see if there is a specific route for the destination and use
+ * an address of the attached interface
+ * iii) don't use deprecated addresses
+ */
+static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
+{
+ int pref;
+ pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
+#ifdef CONFIG_IPV6_PRIVACY
+ pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
+#endif
+ return pref;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+#define IPV6_GET_SADDR_MAXSCORE(score) ((score) == 3)
+#else
+#define IPV6_GET_SADDR_MAXSCORE(score) (score)
+#endif
+
+int ipv6_dev_get_saddr(struct net_device *dev,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ struct inet6_ifaddr *ifp = NULL;
+ struct inet6_ifaddr *match = NULL;
+ struct inet6_dev *idev;
+ int scope;
+ int err;
+ int hiscore = -1, score;
+
+ scope = ipv6_addr_scope(daddr);
+
+ /*
+ * known dev
+ * search dev and walk through dev addresses
+ */
+
+ if (dev) {
+ if (dev->flags & IFF_LOOPBACK)
+ scope = IFA_HOST;
+
+ read_lock(&addrconf_lock);
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == scope) {
+ if (ifp->flags&IFA_F_TENTATIVE)
+ continue;
+#ifdef CONFIG_IPV6_PRIVACY
+ score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+ score = ipv6_saddr_pref(ifp, 0);
+#endif
+ if (score <= hiscore)
+ continue;
+
+ if (match)
+ in6_ifa_put(match);
+ match = ifp;
+ hiscore = score;
+ in6_ifa_hold(ifp);
+
+ if (IPV6_GET_SADDR_MAXSCORE(score)) {
+ read_unlock_bh(&idev->lock);
+ read_unlock(&addrconf_lock);
+ goto out;
+ }
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ read_unlock(&addrconf_lock);
+ }
+
+ if (scope == IFA_LINK)
+ goto out;
+
+ /*
+ * dev == NULL or search failed for specified dev
+ */
+
+ read_lock(&dev_base_lock);
+ read_lock(&addrconf_lock);
+ for (dev = dev_base; dev; dev=dev->next) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == scope) {
+ if (ifp->flags&IFA_F_TENTATIVE)
+ continue;
+#ifdef CONFIG_IPV6_PRIVACY
+ score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
+#else
+ score = ipv6_saddr_pref(ifp, 0);
+#endif
+ if (score <= hiscore)
+ continue;
+
+ if (match)
+ in6_ifa_put(match);
+ match = ifp;
+ hiscore = score;
+ in6_ifa_hold(ifp);
+
+ if (IPV6_GET_SADDR_MAXSCORE(score)) {
+ read_unlock_bh(&idev->lock);
+ goto out_unlock_base;
+ }
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ }
+
+out_unlock_base:
+ read_unlock(&addrconf_lock);
+ read_unlock(&dev_base_lock);
+
+out:
+ err = -EADDRNOTAVAIL;
+ if (match) {
+ ipv6_addr_copy(saddr, &match->addr);
+ err = 0;
+ in6_ifa_put(match);
+ }
+
+ return err;
+}
+
+
+int ipv6_get_saddr(struct dst_entry *dst,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ return ipv6_dev_get_saddr(dst ? ((struct rt6_info *)dst)->rt6i_idev->dev : NULL, daddr, saddr);
+}
+
+
+int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr)
+{
+ struct inet6_dev *idev;
+ int err = -EADDRNOTAVAIL;
+
+ read_lock(&addrconf_lock);
+ if ((idev = __in6_dev_get(dev)) != NULL) {
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+ ipv6_addr_copy(addr, &ifp->addr);
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ read_unlock(&addrconf_lock);
+ return err;
+}
+
+static int ipv6_count_addresses(struct inet6_dev *idev)
+{
+ int cnt = 0;
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+ cnt++;
+ read_unlock_bh(&idev->lock);
+ return cnt;
+}
+
+int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict)
+{
+ struct inet6_ifaddr * ifp;
+ u8 hash = ipv6_addr_hash(addr);
+
+ read_lock_bh(&addrconf_hash_lock);
+ for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ if (ipv6_addr_equal(&ifp->addr, addr) &&
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ if (dev == NULL || ifp->idev->dev == dev ||
+ !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
+ break;
+ }
+ }
+ read_unlock_bh(&addrconf_hash_lock);
+ return ifp != NULL;
+}
+
+static
+int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *dev)
+{
+ struct inet6_ifaddr * ifp;
+ u8 hash = ipv6_addr_hash(addr);
+
+ for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (dev == NULL || ifp->idev->dev == dev)
+ break;
+ }
+ }
+ return ifp != NULL;
+}
+
+struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict)
+{
+ struct inet6_ifaddr * ifp;
+ u8 hash = ipv6_addr_hash(addr);
+
+ read_lock_bh(&addrconf_hash_lock);
+ for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+ if (ipv6_addr_equal(&ifp->addr, addr)) {
+ if (dev == NULL || ifp->idev->dev == dev ||
+ !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+ in6_ifa_hold(ifp);
+ break;
+ }
+ }
+ }
+ read_unlock_bh(&addrconf_hash_lock);
+
+ return ifp;
+}
+
+int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
+{
+ const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
+ const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+ u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
+ u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2);
+ int sk_ipv6only = ipv6_only_sock(sk);
+ int sk2_ipv6only = tcp_v6_ipv6only(sk2);
+ int addr_type = ipv6_addr_type(sk_rcv_saddr6);
+ int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
+
+ if (!sk2_rcv_saddr && !sk_ipv6only)
+ return 1;
+
+ if (addr_type2 == IPV6_ADDR_ANY &&
+ !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_ANY &&
+ !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
+ return 1;
+
+ if (sk2_rcv_saddr6 &&
+ ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
+ return 1;
+
+ if (addr_type == IPV6_ADDR_MAPPED &&
+ !sk2_ipv6only &&
+ (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
+ return 1;
+
+ return 0;
+}
+
+/* Gets referenced address, destroys ifaddr */
+
+void addrconf_dad_failure(struct inet6_ifaddr *ifp)
+{
+ if (net_ratelimit())
+ printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
+ if (ifp->flags&IFA_F_PERMANENT) {
+ spin_lock_bh(&ifp->lock);
+ addrconf_del_timer(ifp);
+ ifp->flags |= IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ in6_ifa_put(ifp);
+#ifdef CONFIG_IPV6_PRIVACY
+ } else if (ifp->flags&IFA_F_TEMPORARY) {
+ struct inet6_ifaddr *ifpub;
+ spin_lock_bh(&ifp->lock);
+ ifpub = ifp->ifpub;
+ if (ifpub) {
+ in6_ifa_hold(ifpub);
+ spin_unlock_bh(&ifp->lock);
+ ipv6_create_tempaddr(ifpub, ifp);
+ in6_ifa_put(ifpub);
+ } else {
+ spin_unlock_bh(&ifp->lock);
+ }
+ ipv6_del_addr(ifp);
+#endif
+ } else
+ ipv6_del_addr(ifp);
+}
+
+
+/* Join to solicited addr multicast group. */
+
+void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr)
+{
+ struct in6_addr maddr;
+
+ if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+}
+
+void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr)
+{
+ struct in6_addr maddr;
+
+ if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
+ return;
+
+ addrconf_addr_solict_mult(addr, &maddr);
+ __ipv6_dev_mc_dec(idev, &maddr);
+}
+
+void addrconf_join_anycast(struct inet6_ifaddr *ifp)
+{
+ struct in6_addr addr;
+ ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+ if (ipv6_addr_any(&addr))
+ return;
+ ipv6_dev_ac_inc(ifp->idev->dev, &addr);
+}
+
+void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
+{
+ struct in6_addr addr;
+ ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
+ if (ipv6_addr_any(&addr))
+ return;
+ __ipv6_dev_ac_dec(ifp->idev, &addr);
+}
+
+static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
+{
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_FDDI:
+ case ARPHRD_IEEE802_TR:
+ if (dev->addr_len != ETH_ALEN)
+ return -1;
+ memcpy(eui, dev->dev_addr, 3);
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+ /*
+ * The zSeries OSA network cards can be shared among various
+ * OS instances, but the OSA cards have only one MAC address.
+ * This leads to duplicate address conflicts in conjunction
+ * with IPv6 if more than one instance uses the same card.
+ *
+ * The driver for these cards can deliver a unique 16-bit
+ * identifier for each instance sharing the same card. It is
+ * placed instead of 0xFFFE in the interface identifier. The
+ * "u" bit of the interface identifier is not inverted in this
+ * case. Hence the resulting interface identifier has local
+ * scope according to RFC2373.
+ */
+ if (dev->dev_id) {
+ eui[3] = (dev->dev_id >> 8) & 0xFF;
+ eui[4] = dev->dev_id & 0xFF;
+ } else {
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ }
+ return 0;
+ case ARPHRD_ARCNET:
+ /* XXX: inherit EUI-64 from other interface -- yoshfuji */
+ if (dev->addr_len != ARCNET_ALEN)
+ return -1;
+ memset(eui, 0, 7);
+ eui[7] = *(u8*)dev->dev_addr;
+ return 0;
+ case ARPHRD_INFINIBAND:
+ if (dev->addr_len != INFINIBAND_ALEN)
+ return -1;
+ memcpy(eui, dev->dev_addr + 12, 8);
+ eui[0] |= 2;
+ return 0;
+ }
+ return -1;
+}
+
+static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
+{
+ int err = -1;
+ struct inet6_ifaddr *ifp;
+
+ read_lock_bh(&idev->lock);
+ for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
+ memcpy(eui, ifp->addr.s6_addr+8, 8);
+ err = 0;
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+#ifdef CONFIG_IPV6_PRIVACY
+/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
+static int __ipv6_regen_rndid(struct inet6_dev *idev)
+{
+ struct net_device *dev;
+ struct scatterlist sg[2];
+
+ sg[0].page = virt_to_page(idev->entropy);
+ sg[0].offset = offset_in_page(idev->entropy);
+ sg[0].length = 8;
+ sg[1].page = virt_to_page(idev->work_eui64);
+ sg[1].offset = offset_in_page(idev->work_eui64);
+ sg[1].length = 8;
+
+ dev = idev->dev;
+
+ if (ipv6_generate_eui64(idev->work_eui64, dev)) {
+ printk(KERN_INFO
+ "__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
+ idev);
+ get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
+ }
+regen:
+ spin_lock(&md5_tfm_lock);
+ if (unlikely(md5_tfm == NULL)) {
+ spin_unlock(&md5_tfm_lock);
+ return -1;
+ }
+ crypto_digest_init(md5_tfm);
+ crypto_digest_update(md5_tfm, sg, 2);
+ crypto_digest_final(md5_tfm, idev->work_digest);
+ spin_unlock(&md5_tfm_lock);
+
+ memcpy(idev->rndid, &idev->work_digest[0], 8);
+ idev->rndid[0] &= ~0x02;
+ memcpy(idev->entropy, &idev->work_digest[8], 8);
+
+ /*
+ * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
+ * check if generated address is not inappropriate
+ *
+ * - Reserved subnet anycast (RFC 2526)
+ * 11111101 11....11 1xxxxxxx
+ * - ISATAP (draft-ietf-ngtrans-isatap-13.txt) 5.1
+ * 00-00-5E-FE-xx-xx-xx-xx
+ * - value 0
+ * - XXX: already assigned to an address on the device
+ */
+ if (idev->rndid[0] == 0xfd &&
+ (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
+ (idev->rndid[7]&0x80))
+ goto regen;
+ if ((idev->rndid[0]|idev->rndid[1]) == 0) {
+ if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
+ goto regen;
+ if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
+ goto regen;
+ }
+
+ return 0;
+}
+
+static void ipv6_regen_rndid(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *) data;
+ unsigned long expires;
+
+ read_lock_bh(&addrconf_lock);
+ write_lock_bh(&idev->lock);
+
+ if (idev->dead)
+ goto out;
+
+ if (__ipv6_regen_rndid(idev) < 0)
+ goto out;
+
+ expires = jiffies +
+ idev->cnf.temp_prefered_lft * HZ -
+ idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
+ if (time_before(expires, jiffies)) {
+ printk(KERN_WARNING
+ "ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
+ idev->dev->name);
+ goto out;
+ }
+
+ if (!mod_timer(&idev->regen_timer, expires))
+ in6_dev_hold(idev);
+
+out:
+ write_unlock_bh(&idev->lock);
+ read_unlock_bh(&addrconf_lock);
+ in6_dev_put(idev);
+}
+
+static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
+ int ret = 0;
+
+ if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
+ ret = __ipv6_regen_rndid(idev);
+ return ret;
+}
+#endif
+
+/*
+ * Add prefix route.
+ */
+
+static void
+addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
+ unsigned long expires, unsigned flags)
+{
+ struct in6_rtmsg rtmsg;
+
+ memset(&rtmsg, 0, sizeof(rtmsg));
+ ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx);
+ rtmsg.rtmsg_dst_len = plen;
+ rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+ rtmsg.rtmsg_ifindex = dev->ifindex;
+ rtmsg.rtmsg_info = expires;
+ rtmsg.rtmsg_flags = RTF_UP|flags;
+ rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+
+ /* Prevent useless cloning on PtP SIT.
+ This thing is done here expecting that the whole
+ class of non-broadcast devices need not cloning.
+ */
+ if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
+ rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+
+ ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+/* Create "default" multicast route to the interface */
+
+static void addrconf_add_mroute(struct net_device *dev)
+{
+ struct in6_rtmsg rtmsg;
+
+ memset(&rtmsg, 0, sizeof(rtmsg));
+ ipv6_addr_set(&rtmsg.rtmsg_dst,
+ htonl(0xFF000000), 0, 0, 0);
+ rtmsg.rtmsg_dst_len = 8;
+ rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+ rtmsg.rtmsg_ifindex = dev->ifindex;
+ rtmsg.rtmsg_flags = RTF_UP;
+ rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+ ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+static void sit_route_add(struct net_device *dev)
+{
+ struct in6_rtmsg rtmsg;
+
+ memset(&rtmsg, 0, sizeof(rtmsg));
+
+ rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+ rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF;
+
+ /* prefix length - 96 bits "::d.d.d.d" */
+ rtmsg.rtmsg_dst_len = 96;
+ rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
+ rtmsg.rtmsg_ifindex = dev->ifindex;
+
+ ip6_route_add(&rtmsg, NULL, NULL);
+}
+
+static void addrconf_add_lroute(struct net_device *dev)
+{
+ struct in6_addr addr;
+
+ ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
+}
+
+static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ if ((idev = ipv6_find_idev(dev)) == NULL)
+ return NULL;
+
+ /* Add default multicast route */
+ addrconf_add_mroute(dev);
+
+ /* Add link local route */
+ addrconf_add_lroute(dev);
+ return idev;
+}
+
+void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+{
+ struct prefix_info *pinfo;
+ __u32 valid_lft;
+ __u32 prefered_lft;
+ int addr_type;
+ unsigned long rt_expires;
+ struct inet6_dev *in6_dev;
+
+ pinfo = (struct prefix_info *) opt;
+
+ if (len < sizeof(struct prefix_info)) {
+ ADBG(("addrconf: prefix option too short\n"));
+ return;
+ }
+
+ /*
+ * Validation checks ([ADDRCONF], page 19)
+ */
+
+ addr_type = ipv6_addr_type(&pinfo->prefix);
+
+ if (addr_type & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL))
+ return;
+
+ valid_lft = ntohl(pinfo->valid);
+ prefered_lft = ntohl(pinfo->prefered);
+
+ if (prefered_lft > valid_lft) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "addrconf: prefix option has invalid lifetime\n");
+ return;
+ }
+
+ in6_dev = in6_dev_get(dev);
+
+ if (in6_dev == NULL) {
+ if (net_ratelimit())
+ printk(KERN_DEBUG "addrconf: device %s not configured\n", dev->name);
+ return;
+ }
+
+ /*
+ * Two things going on here:
+ * 1) Add routes for on-link prefixes
+ * 2) Configure prefixes with the auto flag set
+ */
+
+ /* Avoid arithmetic overflow. Really, we could
+ save rt_expires in seconds, likely valid_lft,
+ but it would require division in fib gc, that it
+ not good.
+ */
+ if (valid_lft >= 0x7FFFFFFF/HZ)
+ rt_expires = 0;
+ else
+ rt_expires = jiffies + valid_lft * HZ;
+
+ if (pinfo->onlink) {
+ struct rt6_info *rt;
+ rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+
+ if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+ if (rt->rt6i_flags&RTF_EXPIRES) {
+ if (valid_lft == 0) {
+ ip6_del_rt(rt, NULL, NULL);
+ rt = NULL;
+ } else {
+ rt->rt6i_expires = rt_expires;
+ }
+ }
+ } else if (valid_lft) {
+ addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+ dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+ }
+ if (rt)
+ dst_release(&rt->u.dst);
+ }
+
+ /* Try to figure out our local address for this prefix */
+
+ if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ struct inet6_ifaddr * ifp;
+ struct in6_addr addr;
+ int create = 0, update_lft = 0;
+
+ if (pinfo->prefix_len == 64) {
+ memcpy(&addr, &pinfo->prefix, 8);
+ if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
+ ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+ goto ok;
+ }
+ if (net_ratelimit())
+ printk(KERN_DEBUG "IPv6 addrconf: prefix with wrong length %d\n",
+ pinfo->prefix_len);
+ in6_dev_put(in6_dev);
+ return;
+
+ok:
+
+ ifp = ipv6_get_ifaddr(&addr, dev, 1);
+
+ if (ifp == NULL && valid_lft) {
+ int max_addresses = in6_dev->cnf.max_addresses;
+
+ /* Do not allow to create too much of autoconfigured
+ * addresses; this would be too easy way to crash kernel.
+ */
+ if (!max_addresses ||
+ ipv6_count_addresses(in6_dev) < max_addresses)
+ ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+
+ if (!ifp || IS_ERR(ifp)) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+
+ update_lft = create = 1;
+ ifp->cstamp = jiffies;
+ addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+ }
+
+ if (ifp) {
+ int flags;
+ unsigned long now;
+#ifdef CONFIG_IPV6_PRIVACY
+ struct inet6_ifaddr *ift;
+#endif
+ u32 stored_lft;
+
+ /* update lifetime (RFC2462 5.5.3 e) */
+ spin_lock(&ifp->lock);
+ now = jiffies;
+ if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
+ stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
+ else
+ stored_lft = 0;
+ if (!update_lft && stored_lft) {
+ if (valid_lft > MIN_VALID_LIFETIME ||
+ valid_lft > stored_lft)
+ update_lft = 1;
+ else if (stored_lft <= MIN_VALID_LIFETIME) {
+ /* valid_lft <= stored_lft is always true */
+ /* XXX: IPsec */
+ update_lft = 0;
+ } else {
+ valid_lft = MIN_VALID_LIFETIME;
+ if (valid_lft < prefered_lft)
+ prefered_lft = valid_lft;
+ update_lft = 1;
+ }
+ }
+
+ if (update_lft) {
+ ifp->valid_lft = valid_lft;
+ ifp->prefered_lft = prefered_lft;
+ ifp->tstamp = now;
+ flags = ifp->flags;
+ ifp->flags &= ~IFA_F_DEPRECATED;
+ spin_unlock(&ifp->lock);
+
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ifp);
+ } else
+ spin_unlock(&ifp->lock);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ read_lock_bh(&in6_dev->lock);
+ /* update all temporary addresses in the list */
+ for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+ /*
+ * When adjusting the lifetimes of an existing
+ * temporary address, only lower the lifetimes.
+ * Implementations must not increase the
+ * lifetimes of an existing temporary address
+ * when processing a Prefix Information Option.
+ */
+ spin_lock(&ift->lock);
+ flags = ift->flags;
+ if (ift->valid_lft > valid_lft &&
+ ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
+ ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
+ if (ift->prefered_lft > prefered_lft &&
+ ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
+ ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
+ spin_unlock(&ift->lock);
+ if (!(flags&IFA_F_TENTATIVE))
+ ipv6_ifa_notify(0, ift);
+ }
+
+ if (create && in6_dev->cnf.use_tempaddr > 0) {
+ /*
+ * When a new public address is created as described in [ADDRCONF],
+ * also create a new temporary address.
+ */
+ read_unlock_bh(&in6_dev->lock);
+ ipv6_create_tempaddr(ifp, NULL);
+ } else {
+ read_unlock_bh(&in6_dev->lock);
+ }
+#endif
+ in6_ifa_put(ifp);
+ addrconf_verify(0);
+ }
+ }
+ inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
+ in6_dev_put(in6_dev);
+}
+
+/*
+ * Set destination address.
+ * Special case for SIT interfaces where we create a new "virtual"
+ * device.
+ */
+int addrconf_set_dstaddr(void __user *arg)
+{
+ struct in6_ifreq ireq;
+ struct net_device *dev;
+ int err = -EINVAL;
+
+ rtnl_lock();
+
+ err = -EFAULT;
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ goto err_exit;
+
+ dev = __dev_get_by_index(ireq.ifr6_ifindex);
+
+ err = -ENODEV;
+ if (dev == NULL)
+ goto err_exit;
+
+ if (dev->type == ARPHRD_SIT) {
+ struct ifreq ifr;
+ mm_segment_t oldfs;
+ struct ip_tunnel_parm p;
+
+ err = -EADDRNOTAVAIL;
+ if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
+ goto err_exit;
+
+ memset(&p, 0, sizeof(p));
+ p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
+ p.iph.saddr = 0;
+ p.iph.version = 4;
+ p.iph.ihl = 5;
+ p.iph.protocol = IPPROTO_IPV6;
+ p.iph.ttl = 64;
+ ifr.ifr_ifru.ifru_data = (void __user *)&p;
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+ set_fs(oldfs);
+
+ if (err == 0) {
+ err = -ENOBUFS;
+ if ((dev = __dev_get_by_name(p.name)) == NULL)
+ goto err_exit;
+ err = dev_open(dev);
+ }
+ }
+
+err_exit:
+ rtnl_unlock();
+ return err;
+}
+
+/*
+ * Manual configuration of address on an interface
+ */
+static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen)
+{
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
+ struct net_device *dev;
+ int scope;
+
+ ASSERT_RTNL();
+
+ if ((dev = __dev_get_by_index(ifindex)) == NULL)
+ return -ENODEV;
+
+ if (!(dev->flags&IFF_UP))
+ return -ENETDOWN;
+
+ if ((idev = addrconf_add_dev(dev)) == NULL)
+ return -ENOBUFS;
+
+ scope = ipv6_addr_scope(pfx);
+
+ ifp = ipv6_add_addr(idev, pfx, plen, scope, IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ addrconf_dad_start(ifp, 0);
+ in6_ifa_put(ifp);
+ return 0;
+ }
+
+ return PTR_ERR(ifp);
+}
+
+static int inet6_addr_del(int ifindex, struct in6_addr *pfx, int plen)
+{
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
+ struct net_device *dev;
+
+ if ((dev = __dev_get_by_index(ifindex)) == NULL)
+ return -ENODEV;
+
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ return -ENXIO;
+
+ read_lock_bh(&idev->lock);
+ for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+ if (ifp->prefix_len == plen &&
+ ipv6_addr_equal(pfx, &ifp->addr)) {
+ in6_ifa_hold(ifp);
+ read_unlock_bh(&idev->lock);
+
+ ipv6_del_addr(ifp);
+
+ /* If the last address is deleted administratively,
+ disable IPv6 on this interface.
+ */
+ if (idev->addr_list == NULL)
+ addrconf_ifdown(idev->dev, 1);
+ return 0;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ return -EADDRNOTAVAIL;
+}
+
+
+int addrconf_add_ifaddr(void __user *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ rtnl_unlock();
+ return err;
+}
+
+int addrconf_del_ifaddr(void __user *arg)
+{
+ struct in6_ifreq ireq;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ return -EFAULT;
+
+ rtnl_lock();
+ err = inet6_addr_del(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen);
+ rtnl_unlock();
+ return err;
+}
+
+static void sit_add_v4_addrs(struct inet6_dev *idev)
+{
+ struct inet6_ifaddr * ifp;
+ struct in6_addr addr;
+ struct net_device *dev;
+ int scope;
+
+ ASSERT_RTNL();
+
+ memset(&addr, 0, sizeof(struct in6_addr));
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+
+ if (idev->dev->flags&IFF_POINTOPOINT) {
+ addr.s6_addr32[0] = htonl(0xfe800000);
+ scope = IFA_LINK;
+ } else {
+ scope = IPV6_ADDR_COMPATv4;
+ }
+
+ if (addr.s6_addr32[3]) {
+ ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ in6_ifa_put(ifp);
+ }
+ return;
+ }
+
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ struct in_device * in_dev = __in_dev_get(dev);
+ if (in_dev && (dev->flags & IFF_UP)) {
+ struct in_ifaddr * ifa;
+
+ int flag = scope;
+
+ for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
+ int plen;
+
+ addr.s6_addr32[3] = ifa->ifa_local;
+
+ if (ifa->ifa_scope == RT_SCOPE_LINK)
+ continue;
+ if (ifa->ifa_scope >= RT_SCOPE_HOST) {
+ if (idev->dev->flags&IFF_POINTOPOINT)
+ continue;
+ flag |= IFA_HOST;
+ }
+ if (idev->dev->flags&IFF_POINTOPOINT)
+ plen = 64;
+ else
+ plen = 96;
+
+ ifp = ipv6_add_addr(idev, &addr, plen, flag,
+ IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ in6_ifa_put(ifp);
+ }
+ }
+ }
+ }
+}
+
+static void init_loopback(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr * ifp;
+
+ /* ::1 */
+
+ ASSERT_RTNL();
+
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ printk(KERN_DEBUG "init loopback: add_dev failed\n");
+ return;
+ }
+
+ ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ spin_lock_bh(&ifp->lock);
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+ in6_ifa_put(ifp);
+ }
+}
+
+static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
+{
+ struct inet6_ifaddr * ifp;
+
+ ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, IFA_F_PERMANENT);
+ if (!IS_ERR(ifp)) {
+ addrconf_dad_start(ifp, 0);
+ in6_ifa_put(ifp);
+ }
+}
+
+static void addrconf_dev_config(struct net_device *dev)
+{
+ struct in6_addr addr;
+ struct inet6_dev * idev;
+
+ ASSERT_RTNL();
+
+ if ((dev->type != ARPHRD_ETHER) &&
+ (dev->type != ARPHRD_FDDI) &&
+ (dev->type != ARPHRD_IEEE802_TR) &&
+ (dev->type != ARPHRD_ARCNET) &&
+ (dev->type != ARPHRD_INFINIBAND)) {
+ /* Alas, we support only Ethernet autoconfiguration. */
+ return;
+ }
+
+ idev = addrconf_add_dev(dev);
+ if (idev == NULL)
+ return;
+
+ memset(&addr, 0, sizeof(struct in6_addr));
+ addr.s6_addr32[0] = htonl(0xFE800000);
+
+ if (ipv6_generate_eui64(addr.s6_addr + 8, dev) == 0)
+ addrconf_add_linklocal(idev, &addr);
+}
+
+static void addrconf_sit_config(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ /*
+ * Configure the tunnel with one of our IPv4
+ * addresses... we should configure all of
+ * our v4 addrs in the tunnel
+ */
+
+ if ((idev = ipv6_find_idev(dev)) == NULL) {
+ printk(KERN_DEBUG "init sit: add_dev failed\n");
+ return;
+ }
+
+ sit_add_v4_addrs(idev);
+
+ if (dev->flags&IFF_POINTOPOINT) {
+ addrconf_add_mroute(dev);
+ addrconf_add_lroute(dev);
+ } else
+ sit_route_add(dev);
+}
+
+static inline int
+ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
+{
+ struct in6_addr lladdr;
+
+ if (!ipv6_get_lladdr(link_dev, &lladdr)) {
+ addrconf_add_linklocal(idev, &lladdr);
+ return 0;
+ }
+ return -1;
+}
+
+static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
+{
+ struct net_device *link_dev;
+
+ /* first try to inherit the link-local address from the link device */
+ if (idev->dev->iflink &&
+ (link_dev = __dev_get_by_index(idev->dev->iflink))) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ /* then try to inherit it from any device */
+ for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
+ if (!ipv6_inherit_linklocal(idev, link_dev))
+ return;
+ }
+ printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
+}
+
+/*
+ * Autoconfigure tunnel with a link-local address so routing protocols,
+ * DHCPv6, MLD etc. can be run over the virtual link
+ */
+
+static void addrconf_ip6_tnl_config(struct net_device *dev)
+{
+ struct inet6_dev *idev;
+
+ ASSERT_RTNL();
+
+ if ((idev = addrconf_add_dev(dev)) == NULL) {
+ printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
+ return;
+ }
+ ip6_tnl_add_linklocal(idev);
+ addrconf_add_mroute(dev);
+}
+
+static int addrconf_notify(struct notifier_block *this, unsigned long event,
+ void * data)
+{
+ struct net_device *dev = (struct net_device *) data;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+
+ switch(event) {
+ case NETDEV_UP:
+ switch(dev->type) {
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+ case ARPHRD_TUNNEL6:
+ addrconf_ip6_tnl_config(dev);
+ break;
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ };
+ if (idev) {
+ /* If the MTU changed during the interface down, when the
+ interface up, the changed MTU must be reflected in the
+ idev as well as routers.
+ */
+ if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ }
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+ /* If the changed mtu during down is lower than IPV6_MIN_MTU
+ stop IPv6 on this interface.
+ */
+ if (dev->mtu < IPV6_MIN_MTU)
+ addrconf_ifdown(dev, event != NETDEV_DOWN);
+ }
+ break;
+
+ case NETDEV_CHANGEMTU:
+ if ( idev && dev->mtu >= IPV6_MIN_MTU) {
+ rt6_mtu_change(dev, dev->mtu);
+ idev->cnf.mtu6 = dev->mtu;
+ break;
+ }
+
+ /* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ /*
+ * Remove all addresses from this interface.
+ */
+ addrconf_ifdown(dev, event != NETDEV_DOWN);
+ break;
+ case NETDEV_CHANGE:
+ break;
+ case NETDEV_CHANGENAME:
+#ifdef CONFIG_SYSCTL
+ if (idev) {
+ addrconf_sysctl_unregister(&idev->cnf);
+ neigh_sysctl_unregister(idev->nd_parms);
+ neigh_sysctl_register(dev, idev->nd_parms,
+ NET_IPV6, NET_IPV6_NEIGH, "ipv6",
+ &ndisc_ifinfo_sysctl_change,
+ NULL);
+ addrconf_sysctl_register(idev, &idev->cnf);
+ }
+#endif
+ break;
+ };
+
+ return NOTIFY_OK;
+}
+
+/*
+ * addrconf module should be notified of a device going up
+ */
+static struct notifier_block ipv6_dev_notf = {
+ .notifier_call = addrconf_notify,
+ .priority = 0
+};
+
+static int addrconf_ifdown(struct net_device *dev, int how)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa, **bifa;
+ int i;
+
+ ASSERT_RTNL();
+
+ if (dev == &loopback_dev && how == 1)
+ how = 0;
+
+ rt6_ifdown(dev);
+ neigh_ifdown(&nd_tbl, dev);
+
+ idev = __in6_dev_get(dev);
+ if (idev == NULL)
+ return -ENODEV;
+
+ /* Step 1: remove reference to ipv6 device from parent device.
+ Do not dev_put!
+ */
+ if (how == 1) {
+ write_lock_bh(&addrconf_lock);
+ dev->ip6_ptr = NULL;
+ idev->dead = 1;
+ write_unlock_bh(&addrconf_lock);
+
+ /* Step 1.5: remove snmp6 entry */
+ snmp6_unregister_dev(idev);
+
+ }
+
+ /* Step 2: clear hash table */
+ for (i=0; i<IN6_ADDR_HSIZE; i++) {
+ bifa = &inet6_addr_lst[i];
+
+ write_lock_bh(&addrconf_hash_lock);
+ while ((ifa = *bifa) != NULL) {
+ if (ifa->idev == idev) {
+ *bifa = ifa->lst_next;
+ ifa->lst_next = NULL;
+ addrconf_del_timer(ifa);
+ in6_ifa_put(ifa);
+ continue;
+ }
+ bifa = &ifa->lst_next;
+ }
+ write_unlock_bh(&addrconf_hash_lock);
+ }
+
+ write_lock_bh(&idev->lock);
+
+ /* Step 3: clear flags for stateless addrconf */
+ if (how != 1)
+ idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD);
+
+ /* Step 4: clear address list */
+#ifdef CONFIG_IPV6_PRIVACY
+ if (how == 1 && del_timer(&idev->regen_timer))
+ in6_dev_put(idev);
+
+ /* clear tempaddr list */
+ while ((ifa = idev->tempaddr_list) != NULL) {
+ idev->tempaddr_list = ifa->tmp_next;
+ ifa->tmp_next = NULL;
+ ifa->dead = 1;
+ write_unlock_bh(&idev->lock);
+ spin_lock_bh(&ifa->lock);
+
+ if (ifa->ifpub) {
+ in6_ifa_put(ifa->ifpub);
+ ifa->ifpub = NULL;
+ }
+ spin_unlock_bh(&ifa->lock);
+ in6_ifa_put(ifa);
+ write_lock_bh(&idev->lock);
+ }
+#endif
+ while ((ifa = idev->addr_list) != NULL) {
+ idev->addr_list = ifa->if_next;
+ ifa->if_next = NULL;
+ ifa->dead = 1;
+ addrconf_del_timer(ifa);
+ write_unlock_bh(&idev->lock);
+
+ __ipv6_ifa_notify(RTM_DELADDR, ifa);
+ in6_ifa_put(ifa);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+
+ /* Step 5: Discard multicast list */
+
+ if (how == 1)
+ ipv6_mc_destroy_dev(idev);
+ else
+ ipv6_mc_down(idev);
+
+ /* Step 5: netlink notification of this interface */
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+
+ /* Shot the device (if unregistered) */
+
+ if (how == 1) {
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl_unregister(&idev->cnf);
+ neigh_sysctl_unregister(idev->nd_parms);
+#endif
+ neigh_parms_release(&nd_tbl, idev->nd_parms);
+ neigh_ifdown(&nd_tbl, dev);
+ in6_dev_put(idev);
+ }
+ return 0;
+}
+
+static void addrconf_rs_timer(unsigned long data)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+
+ if (ifp->idev->cnf.forwarding)
+ goto out;
+
+ if (ifp->idev->if_flags & IF_RA_RCVD) {
+ /*
+ * Announcement received after solicitation
+ * was sent
+ */
+ goto out;
+ }
+
+ spin_lock(&ifp->lock);
+ if (ifp->probes++ < ifp->idev->cnf.rtr_solicits) {
+ struct in6_addr all_routers;
+
+ /* The wait after the last probe can be shorter */
+ addrconf_mod_timer(ifp, AC_RS,
+ (ifp->probes == ifp->idev->cnf.rtr_solicits) ?
+ ifp->idev->cnf.rtr_solicit_delay :
+ ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock(&ifp->lock);
+
+ ipv6_addr_all_routers(&all_routers);
+
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+ } else {
+ spin_unlock(&ifp->lock);
+ /*
+ * Note: we do not support deprecated "all on-link"
+ * assumption any longer.
+ */
+ printk(KERN_DEBUG "%s: no IPv6 routers present\n",
+ ifp->idev->dev->name);
+ }
+
+out:
+ in6_ifa_put(ifp);
+}
+
+/*
+ * Duplicate Address Detection
+ */
+static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags)
+{
+ struct inet6_dev *idev = ifp->idev;
+ struct net_device *dev = idev->dev;
+ unsigned long rand_num;
+
+ addrconf_join_solict(dev, &ifp->addr);
+
+ if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
+ flags);
+
+ net_srandom(ifp->addr.s6_addr32[3]);
+ rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);
+
+ read_lock_bh(&idev->lock);
+ if (ifp->dead)
+ goto out;
+ spin_lock_bh(&ifp->lock);
+
+ if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+
+ addrconf_dad_completed(ifp);
+ return;
+ }
+
+ ifp->probes = idev->cnf.dad_transmits;
+ addrconf_mod_timer(ifp, AC_DAD, rand_num);
+
+ spin_unlock_bh(&ifp->lock);
+out:
+ read_unlock_bh(&idev->lock);
+}
+
+static void addrconf_dad_timer(unsigned long data)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_dev *idev = ifp->idev;
+ struct in6_addr unspec;
+ struct in6_addr mcaddr;
+
+ read_lock_bh(&idev->lock);
+ if (idev->dead) {
+ read_unlock_bh(&idev->lock);
+ goto out;
+ }
+ spin_lock_bh(&ifp->lock);
+ if (ifp->probes == 0) {
+ /*
+ * DAD was successful
+ */
+
+ ifp->flags &= ~IFA_F_TENTATIVE;
+ spin_unlock_bh(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+
+ addrconf_dad_completed(ifp);
+
+ goto out;
+ }
+
+ ifp->probes--;
+ addrconf_mod_timer(ifp, AC_DAD, ifp->idev->nd_parms->retrans_time);
+ spin_unlock_bh(&ifp->lock);
+ read_unlock_bh(&idev->lock);
+
+ /* send a neighbour solicitation for our addr */
+ memset(&unspec, 0, sizeof(unspec));
+ addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &unspec);
+out:
+ in6_ifa_put(ifp);
+}
+
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
+{
+ struct net_device * dev = ifp->idev->dev;
+
+ /*
+ * Configure the address for reception. Now it is valid.
+ */
+
+ ipv6_ifa_notify(RTM_NEWADDR, ifp);
+
+ /* If added prefix is link local and forwarding is off,
+ start sending router solicitations.
+ */
+
+ if (ifp->idev->cnf.forwarding == 0 &&
+ ifp->idev->cnf.rtr_solicits > 0 &&
+ (dev->flags&IFF_LOOPBACK) == 0 &&
+ (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ struct in6_addr all_routers;
+
+ ipv6_addr_all_routers(&all_routers);
+
+ /*
+ * If a host as already performed a random delay
+ * [...] as part of DAD [...] there is no need
+ * to delay again before sending the first RS
+ */
+ ndisc_send_rs(ifp->idev->dev, &ifp->addr, &all_routers);
+
+ spin_lock_bh(&ifp->lock);
+ ifp->probes = 1;
+ ifp->idev->if_flags |= IF_RS_SENT;
+ addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval);
+ spin_unlock_bh(&ifp->lock);
+ }
+}
+
+#ifdef CONFIG_PROC_FS
+struct if6_iter_state {
+ int bucket;
+};
+
+static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
+{
+ struct inet6_ifaddr *ifa = NULL;
+ struct if6_iter_state *state = seq->private;
+
+ for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
+ ifa = inet6_addr_lst[state->bucket];
+ if (ifa)
+ break;
+ }
+ return ifa;
+}
+
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+{
+ struct if6_iter_state *state = seq->private;
+
+ ifa = ifa->lst_next;
+try_again:
+ if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
+ ifa = inet6_addr_lst[state->bucket];
+ goto try_again;
+ }
+ return ifa;
+}
+
+static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct inet6_ifaddr *ifa = if6_get_first(seq);
+
+ if (ifa)
+ while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+ --pos;
+ return pos ? NULL : ifa;
+}
+
+static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock_bh(&addrconf_hash_lock);
+ return if6_get_idx(seq, *pos);
+}
+
+static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct inet6_ifaddr *ifa;
+
+ ifa = if6_get_next(seq, v);
+ ++*pos;
+ return ifa;
+}
+
+static void if6_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock_bh(&addrconf_hash_lock);
+}
+
+static int if6_seq_show(struct seq_file *seq, void *v)
+{
+ struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v;
+ seq_printf(seq,
+ "%04x%04x%04x%04x%04x%04x%04x%04x %02x %02x %02x %02x %8s\n",
+ NIP6(ifp->addr),
+ ifp->idev->dev->ifindex,
+ ifp->prefix_len,
+ ifp->scope,
+ ifp->flags,
+ ifp->idev->dev->name);
+ return 0;
+}
+
+static struct seq_operations if6_seq_ops = {
+ .start = if6_seq_start,
+ .next = if6_seq_next,
+ .show = if6_seq_show,
+ .stop = if6_seq_stop,
+};
+
+static int if6_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+ struct if6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
+ memset(s, 0, sizeof(*s));
+
+ rc = seq_open(file, &if6_seq_ops);
+ if (rc)
+ goto out_kfree;
+
+ seq = file->private_data;
+ seq->private = s;
+out:
+ return rc;
+out_kfree:
+ kfree(s);
+ goto out;
+}
+
+static struct file_operations if6_fops = {
+ .owner = THIS_MODULE,
+ .open = if6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+int __init if6_proc_init(void)
+{
+ if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
+ return -ENOMEM;
+ return 0;
+}
+
+void if6_proc_exit(void)
+{
+ proc_net_remove("if_inet6");
+}
+#endif /* CONFIG_PROC_FS */
+
+/*
+ * Periodic address status verification
+ */
+
+static void addrconf_verify(unsigned long foo)
+{
+ struct inet6_ifaddr *ifp;
+ unsigned long now, next;
+ int i;
+
+ spin_lock_bh(&addrconf_verify_lock);
+ now = jiffies;
+ next = now + ADDR_CHECK_FREQUENCY;
+
+ del_timer(&addr_chk_timer);
+
+ for (i=0; i < IN6_ADDR_HSIZE; i++) {
+
+restart:
+ write_lock(&addrconf_hash_lock);
+ for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+ unsigned long age;
+#ifdef CONFIG_IPV6_PRIVACY
+ unsigned long regen_advance;
+#endif
+
+ if (ifp->flags & IFA_F_PERMANENT)
+ continue;
+
+ spin_lock(&ifp->lock);
+ age = (now - ifp->tstamp) / HZ;
+
+#ifdef CONFIG_IPV6_PRIVACY
+ regen_advance = ifp->idev->cnf.regen_max_retry *
+ ifp->idev->cnf.dad_transmits *
+ ifp->idev->nd_parms->retrans_time / HZ;
+#endif
+
+ if (age >= ifp->valid_lft) {
+ spin_unlock(&ifp->lock);
+ in6_ifa_hold(ifp);
+ write_unlock(&addrconf_hash_lock);
+ ipv6_del_addr(ifp);
+ goto restart;
+ } else if (age >= ifp->prefered_lft) {
+ /* jiffies - ifp->tsamp > age >= ifp->prefered_lft */
+ int deprecate = 0;
+
+ if (!(ifp->flags&IFA_F_DEPRECATED)) {
+ deprecate = 1;
+ ifp->flags |= IFA_F_DEPRECATED;
+ }
+
+ if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))
+ next = ifp->tstamp + ifp->valid_lft * HZ;
+
+ spin_unlock(&ifp->lock);
+
+ if (deprecate) {
+ in6_ifa_hold(ifp);
+ write_unlock(&addrconf_hash_lock);
+
+ ipv6_ifa_notify(0, ifp);
+ in6_ifa_put(ifp);
+ goto restart;
+ }
+#ifdef CONFIG_IPV6_PRIVACY
+ } else if ((ifp->flags&IFA_F_TEMPORARY) &&
+ !(ifp->flags&IFA_F_TENTATIVE)) {
+ if (age >= ifp->prefered_lft - regen_advance) {
+ struct inet6_ifaddr *ifpub = ifp->ifpub;
+ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ;
+ if (!ifp->regen_count && ifpub) {
+ ifp->regen_count++;
+ in6_ifa_hold(ifp);
+ in6_ifa_hold(ifpub);
+ spin_unlock(&ifp->lock);
+ write_unlock(&addrconf_hash_lock);
+ ipv6_create_tempaddr(ifpub, ifp);
+ in6_ifa_put(ifpub);
+ in6_ifa_put(ifp);
+ goto restart;
+ }
+ } else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
+ spin_unlock(&ifp->lock);
+#endif
+ } else {
+ /* ifp->prefered_lft <= ifp->valid_lft */
+ if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+ next = ifp->tstamp + ifp->prefered_lft * HZ;
+ spin_unlock(&ifp->lock);
+ }
+ }
+ write_unlock(&addrconf_hash_lock);
+ }
+
+ addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+ add_timer(&addr_chk_timer);
+ spin_unlock_bh(&addrconf_verify_lock);
+}
+
+static int
+inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct rtattr **rta = arg;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in6_addr *pfx;
+
+ pfx = NULL;
+ if (rta[IFA_ADDRESS-1]) {
+ if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+ }
+ if (rta[IFA_LOCAL-1]) {
+ if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+ }
+ if (pfx == NULL)
+ return -EINVAL;
+
+ return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int
+inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+{
+ struct rtattr **rta = arg;
+ struct ifaddrmsg *ifm = NLMSG_DATA(nlh);
+ struct in6_addr *pfx;
+
+ pfx = NULL;
+ if (rta[IFA_ADDRESS-1]) {
+ if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_ADDRESS-1]);
+ }
+ if (rta[IFA_LOCAL-1]) {
+ if (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))
+ return -EINVAL;
+ pfx = RTA_DATA(rta[IFA_LOCAL-1]);
+ }
+ if (pfx == NULL)
+ return -EINVAL;
+
+ return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen);
+}
+
+static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
+ u32 pid, u32 seq, int event)
+{
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+ struct ifa_cacheinfo ci;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ ifm = NLMSG_DATA(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = ifa->prefix_len;
+ ifm->ifa_flags = ifa->flags;
+ ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+ if (ifa->scope&IFA_HOST)
+ ifm->ifa_scope = RT_SCOPE_HOST;
+ else if (ifa->scope&IFA_LINK)
+ ifm->ifa_scope = RT_SCOPE_LINK;
+ else if (ifa->scope&IFA_SITE)
+ ifm->ifa_scope = RT_SCOPE_SITE;
+ ifm->ifa_index = ifa->idev->dev->ifindex;
+ RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr);
+ if (!(ifa->flags&IFA_F_PERMANENT)) {
+ ci.ifa_prefered = ifa->prefered_lft;
+ ci.ifa_valid = ifa->valid_lft;
+ if (ci.ifa_prefered != INFINITY_LIFE_TIME) {
+ long tval = (jiffies - ifa->tstamp)/HZ;
+ ci.ifa_prefered -= tval;
+ if (ci.ifa_valid != INFINITY_LIFE_TIME)
+ ci.ifa_valid -= tval;
+ }
+ } else {
+ ci.ifa_prefered = INFINITY_LIFE_TIME;
+ ci.ifa_valid = INFINITY_LIFE_TIME;
+ }
+ ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100
+ + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100
+ + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
+ u32 pid, u32 seq, int event)
+{
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+ struct ifa_cacheinfo ci;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ ifm = NLMSG_DATA(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = 128;
+ ifm->ifa_flags = IFA_F_PERMANENT;
+ ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+ if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE)
+ ifm->ifa_scope = RT_SCOPE_SITE;
+ ifm->ifa_index = ifmca->idev->dev->ifindex;
+ RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr);
+ ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ
+ * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ
+ * 100 / HZ);
+ ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ
+ * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ
+ * 100 / HZ);
+ ci.ifa_prefered = INFINITY_LIFE_TIME;
+ ci.ifa_valid = INFINITY_LIFE_TIME;
+ RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
+ u32 pid, u32 seq, int event)
+{
+ struct ifaddrmsg *ifm;
+ struct nlmsghdr *nlh;
+ struct ifa_cacheinfo ci;
+ unsigned char *b = skb->tail;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ifm));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ ifm = NLMSG_DATA(nlh);
+ ifm->ifa_family = AF_INET6;
+ ifm->ifa_prefixlen = 128;
+ ifm->ifa_flags = IFA_F_PERMANENT;
+ ifm->ifa_scope = RT_SCOPE_UNIVERSE;
+ if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE)
+ ifm->ifa_scope = RT_SCOPE_SITE;
+ ifm->ifa_index = ifaca->aca_idev->dev->ifindex;
+ RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr);
+ ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ
+ * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ
+ * 100 / HZ);
+ ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ
+ * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ
+ * 100 / HZ);
+ ci.ifa_prefered = INFINITY_LIFE_TIME;
+ ci.ifa_valid = INFINITY_LIFE_TIME;
+ RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci);
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+enum addr_type_t
+{
+ UNICAST_ADDR,
+ MULTICAST_ADDR,
+ ANYCAST_ADDR,
+};
+
+static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
+ enum addr_type_t type)
+{
+ int idx, ip_idx;
+ int s_idx, s_ip_idx;
+ int err = 1;
+ struct net_device *dev;
+ struct inet6_dev *idev = NULL;
+ struct inet6_ifaddr *ifa;
+ struct ifmcaddr6 *ifmca;
+ struct ifacaddr6 *ifaca;
+
+ s_idx = cb->args[0];
+ s_ip_idx = ip_idx = cb->args[1];
+ read_lock(&dev_base_lock);
+
+ for (dev = dev_base, idx = 0; dev; dev = dev->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if (idx > s_idx)
+ s_ip_idx = 0;
+ ip_idx = 0;
+ if ((idev = in6_dev_get(dev)) == NULL)
+ continue;
+ read_lock_bh(&idev->lock);
+ switch (type) {
+ case UNICAST_ADDR:
+ /* unicast address */
+ for (ifa = idev->addr_list; ifa;
+ ifa = ifa->if_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if ((err = inet6_fill_ifaddr(skb, ifa,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
+ goto done;
+ }
+ /* temp addr */
+#ifdef CONFIG_IPV6_PRIVACY
+ for (ifa = idev->tempaddr_list; ifa;
+ ifa = ifa->tmp_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if ((err = inet6_fill_ifaddr(skb, ifa,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWADDR)) <= 0)
+ goto done;
+ }
+#endif
+ break;
+ case MULTICAST_ADDR:
+ /* multicast address */
+ for (ifmca = idev->mc_list; ifmca;
+ ifmca = ifmca->next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if ((err = inet6_fill_ifmcaddr(skb, ifmca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_GETMULTICAST)) <= 0)
+ goto done;
+ }
+ break;
+ case ANYCAST_ADDR:
+ /* anycast address */
+ for (ifaca = idev->ac_list; ifaca;
+ ifaca = ifaca->aca_next, ip_idx++) {
+ if (ip_idx < s_ip_idx)
+ continue;
+ if ((err = inet6_fill_ifacaddr(skb, ifaca,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_GETANYCAST)) <= 0)
+ goto done;
+ }
+ break;
+ default:
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ }
+done:
+ if (err <= 0) {
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ }
+ read_unlock(&dev_base_lock);
+ cb->args[0] = idx;
+ cb->args[1] = ip_idx;
+ return skb->len;
+}
+
+static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = UNICAST_ADDR;
+ return inet6_dump_addr(skb, cb, type);
+}
+
+static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = MULTICAST_ADDR;
+ return inet6_dump_addr(skb, cb, type);
+}
+
+
+static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ enum addr_type_t type = ANYCAST_ADDR;
+ return inet6_dump_addr(skb, cb, type);
+}
+
+static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS);
+ return;
+ }
+ if (inet6_fill_ifaddr(skb, ifa, 0, 0, event) < 0) {
+ kfree_skb(skb);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC);
+}
+
+static void inline ipv6_store_devconf(struct ipv6_devconf *cnf,
+ __s32 *array, int bytes)
+{
+ memset(array, 0, bytes);
+ array[DEVCONF_FORWARDING] = cnf->forwarding;
+ array[DEVCONF_HOPLIMIT] = cnf->hop_limit;
+ array[DEVCONF_MTU6] = cnf->mtu6;
+ array[DEVCONF_ACCEPT_RA] = cnf->accept_ra;
+ array[DEVCONF_ACCEPT_REDIRECTS] = cnf->accept_redirects;
+ array[DEVCONF_AUTOCONF] = cnf->autoconf;
+ array[DEVCONF_DAD_TRANSMITS] = cnf->dad_transmits;
+ array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits;
+ array[DEVCONF_RTR_SOLICIT_INTERVAL] = cnf->rtr_solicit_interval;
+ array[DEVCONF_RTR_SOLICIT_DELAY] = cnf->rtr_solicit_delay;
+ array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version;
+#ifdef CONFIG_IPV6_PRIVACY
+ array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;
+ array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;
+ array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;
+ array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;
+ array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
+#endif
+ array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
+}
+
+static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
+ u32 pid, u32 seq, int event)
+{
+ struct net_device *dev = idev->dev;
+ __s32 *array = NULL;
+ struct ifinfomsg *r;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ struct rtattr *subattr;
+ __u32 mtu = dev->mtu;
+ struct ifla_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*r));
+ if (pid) nlh->nlmsg_flags |= NLM_F_MULTI;
+ r = NLMSG_DATA(nlh);
+ r->ifi_family = AF_INET6;
+ r->ifi_type = dev->type;
+ r->ifi_index = dev->ifindex;
+ r->ifi_flags = dev_get_flags(dev);
+ r->ifi_change = 0;
+
+ RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name);
+
+ if (dev->addr_len)
+ RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr);
+
+ RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu);
+ if (dev->ifindex != dev->iflink)
+ RTA_PUT(skb, IFLA_LINK, sizeof(int), &dev->iflink);
+
+ subattr = (struct rtattr*)skb->tail;
+
+ RTA_PUT(skb, IFLA_PROTINFO, 0, NULL);
+
+ /* return the device flags */
+ RTA_PUT(skb, IFLA_INET6_FLAGS, sizeof(__u32), &idev->if_flags);
+
+ /* return interface cacheinfo */
+ ci.max_reasm_len = IPV6_MAXPLEN;
+ ci.tstamp = (__u32)(TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) / HZ * 100
+ + TIME_DELTA(idev->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ);
+ ci.reachable_time = idev->nd_parms->reachable_time;
+ ci.retrans_time = idev->nd_parms->retrans_time;
+ RTA_PUT(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci);
+
+ /* return the device sysctl params */
+ if ((array = kmalloc(DEVCONF_MAX * sizeof(*array), GFP_ATOMIC)) == NULL)
+ goto rtattr_failure;
+ ipv6_store_devconf(&idev->cnf, array, DEVCONF_MAX * sizeof(*array));
+ RTA_PUT(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(*array), array);
+
+ /* XXX - Statistics/MC not implemented */
+ subattr->rta_len = skb->tail - (u8*)subattr;
+
+ nlh->nlmsg_len = skb->tail - b;
+ kfree(array);
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ if (array)
+ kfree(array);
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int idx, err;
+ int s_idx = cb->args[0];
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ read_lock(&dev_base_lock);
+ for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
+ if (idx < s_idx)
+ continue;
+ if ((idev = in6_dev_get(dev)) == NULL)
+ continue;
+ err = inet6_fill_ifinfo(skb, idev, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq, RTM_NEWLINK);
+ in6_dev_put(idev);
+ if (err <= 0)
+ break;
+ }
+ read_unlock(&dev_base_lock);
+ cb->args[0] = idx;
+
+ return skb->len;
+}
+
+void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
+{
+ struct sk_buff *skb;
+ /* 128 bytes ?? */
+ int size = NLMSG_SPACE(sizeof(struct ifinfomsg)+128);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS);
+ return;
+ }
+ if (inet6_fill_ifinfo(skb, idev, 0, 0, event) < 0) {
+ kfree_skb(skb);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC);
+}
+
+static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
+ struct prefix_info *pinfo, u32 pid, u32 seq, int event)
+{
+ struct prefixmsg *pmsg;
+ struct nlmsghdr *nlh;
+ unsigned char *b = skb->tail;
+ struct prefix_cacheinfo ci;
+
+ nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*pmsg));
+
+ if (pid)
+ nlh->nlmsg_flags |= NLM_F_MULTI;
+
+ pmsg = NLMSG_DATA(nlh);
+ pmsg->prefix_family = AF_INET6;
+ pmsg->prefix_ifindex = idev->dev->ifindex;
+ pmsg->prefix_len = pinfo->prefix_len;
+ pmsg->prefix_type = pinfo->type;
+
+ pmsg->prefix_flags = 0;
+ if (pinfo->onlink)
+ pmsg->prefix_flags |= IF_PREFIX_ONLINK;
+ if (pinfo->autoconf)
+ pmsg->prefix_flags |= IF_PREFIX_AUTOCONF;
+
+ RTA_PUT(skb, PREFIX_ADDRESS, sizeof(pinfo->prefix), &pinfo->prefix);
+
+ ci.preferred_time = ntohl(pinfo->prefered);
+ ci.valid_time = ntohl(pinfo->valid);
+ RTA_PUT(skb, PREFIX_CACHEINFO, sizeof(ci), &ci);
+
+ nlh->nlmsg_len = skb->tail - b;
+ return skb->len;
+
+nlmsg_failure:
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static void inet6_prefix_notify(int event, struct inet6_dev *idev,
+ struct prefix_info *pinfo)
+{
+ struct sk_buff *skb;
+ int size = NLMSG_SPACE(sizeof(struct prefixmsg)+128);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS);
+ return;
+ }
+ if (inet6_fill_prefix(skb, idev, pinfo, 0, 0, event) < 0) {
+ kfree_skb(skb);
+ netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL);
+ return;
+ }
+ NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX;
+ netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC);
+}
+
+static struct rtnetlink_link inet6_rtnetlink_table[RTM_MAX - RTM_BASE + 1] = {
+ [RTM_GETLINK - RTM_BASE] = { .dumpit = inet6_dump_ifinfo, },
+ [RTM_NEWADDR - RTM_BASE] = { .doit = inet6_rtm_newaddr, },
+ [RTM_DELADDR - RTM_BASE] = { .doit = inet6_rtm_deladdr, },
+ [RTM_GETADDR - RTM_BASE] = { .dumpit = inet6_dump_ifaddr, },
+ [RTM_GETMULTICAST - RTM_BASE] = { .dumpit = inet6_dump_ifmcaddr, },
+ [RTM_GETANYCAST - RTM_BASE] = { .dumpit = inet6_dump_ifacaddr, },
+ [RTM_NEWROUTE - RTM_BASE] = { .doit = inet6_rtm_newroute, },
+ [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, },
+ [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute,
+ .dumpit = inet6_dump_fib, },
+};
+
+static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+ inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);
+
+ switch (event) {
+ case RTM_NEWADDR:
+ dst_hold(&ifp->rt->u.dst);
+ if (ip6_ins_rt(ifp->rt, NULL, NULL))
+ dst_release(&ifp->rt->u.dst);
+ if (ifp->idev->cnf.forwarding)
+ addrconf_join_anycast(ifp);
+ break;
+ case RTM_DELADDR:
+ if (ifp->idev->cnf.forwarding)
+ addrconf_leave_anycast(ifp);
+ addrconf_leave_solict(ifp->idev, &ifp->addr);
+ dst_hold(&ifp->rt->u.dst);
+ if (ip6_del_rt(ifp->rt, NULL, NULL))
+ dst_free(&ifp->rt->u.dst);
+ else
+ dst_release(&ifp->rt->u.dst);
+ break;
+ }
+}
+
+static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+{
+ read_lock_bh(&addrconf_lock);
+ if (likely(ifp->idev->dead == 0))
+ __ipv6_ifa_notify(event, ifp);
+ read_unlock_bh(&addrconf_lock);
+}
+
+#ifdef CONFIG_SYSCTL
+
+static
+int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ int ret;
+
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+
+ if (write && valp != &ipv6_devconf_dflt.forwarding) {
+ if (valp != &ipv6_devconf.forwarding) {
+ if ((!*valp) ^ (!val)) {
+ struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
+ if (idev == NULL)
+ return ret;
+ dev_forward_change(idev);
+ }
+ } else {
+ ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding;
+ addrconf_forward_change();
+ }
+ if (*valp)
+ rt6_purge_dflt_routers();
+ }
+
+ return ret;
+}
+
+static int addrconf_sysctl_forward_strategy(ctl_table *table,
+ int __user *name, int nlen,
+ void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval, size_t newlen,
+ void **context)
+{
+ int *valp = table->data;
+ int new;
+
+ if (!newval || !newlen)
+ return 0;
+ if (newlen != sizeof(int))
+ return -EINVAL;
+ if (get_user(new, (int __user *)newval))
+ return -EFAULT;
+ if (new == *valp)
+ return 0;
+ if (oldval && oldlenp) {
+ size_t len;
+ if (get_user(len, oldlenp))
+ return -EFAULT;
+ if (len) {
+ if (len > table->maxlen)
+ len = table->maxlen;
+ if (copy_to_user(oldval, valp, len))
+ return -EFAULT;
+ if (put_user(len, oldlenp))
+ return -EFAULT;
+ }
+ }
+
+ if (valp != &ipv6_devconf_dflt.forwarding) {
+ if (valp != &ipv6_devconf.forwarding) {
+ struct inet6_dev *idev = (struct inet6_dev *)table->extra1;
+ int changed;
+ if (unlikely(idev == NULL))
+ return -ENODEV;
+ changed = (!*valp) ^ (!new);
+ *valp = new;
+ if (changed)
+ dev_forward_change(idev);
+ } else {
+ *valp = new;
+ addrconf_forward_change();
+ }
+
+ if (*valp)
+ rt6_purge_dflt_routers();
+ } else
+ *valp = new;
+
+ return 1;
+}
+
+static struct addrconf_sysctl_table
+{
+ struct ctl_table_header *sysctl_header;
+ ctl_table addrconf_vars[__NET_IPV6_MAX];
+ ctl_table addrconf_dev[2];
+ ctl_table addrconf_conf_dir[2];
+ ctl_table addrconf_proto_dir[2];
+ ctl_table addrconf_root_dir[2];
+} addrconf_sysctl = {
+ .sysctl_header = NULL,
+ .addrconf_vars = {
+ {
+ .ctl_name = NET_IPV6_FORWARDING,
+ .procname = "forwarding",
+ .data = &ipv6_devconf.forwarding,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &addrconf_sysctl_forward,
+ .strategy = &addrconf_sysctl_forward_strategy,
+ },
+ {
+ .ctl_name = NET_IPV6_HOP_LIMIT,
+ .procname = "hop_limit",
+ .data = &ipv6_devconf.hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_MTU,
+ .procname = "mtu",
+ .data = &ipv6_devconf.mtu6,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_ACCEPT_RA,
+ .procname = "accept_ra",
+ .data = &ipv6_devconf.accept_ra,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_ACCEPT_REDIRECTS,
+ .procname = "accept_redirects",
+ .data = &ipv6_devconf.accept_redirects,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_AUTOCONF,
+ .procname = "autoconf",
+ .data = &ipv6_devconf.autoconf,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_DAD_TRANSMITS,
+ .procname = "dad_transmits",
+ .data = &ipv6_devconf.dad_transmits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_RTR_SOLICITS,
+ .procname = "router_solicitations",
+ .data = &ipv6_devconf.rtr_solicits,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_RTR_SOLICIT_INTERVAL,
+ .procname = "router_solicitation_interval",
+ .data = &ipv6_devconf.rtr_solicit_interval,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV6_RTR_SOLICIT_DELAY,
+ .procname = "router_solicitation_delay",
+ .data = &ipv6_devconf.rtr_solicit_delay,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_jiffies,
+ .strategy = &sysctl_jiffies,
+ },
+ {
+ .ctl_name = NET_IPV6_FORCE_MLD_VERSION,
+ .procname = "force_mld_version",
+ .data = &ipv6_devconf.force_mld_version,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#ifdef CONFIG_IPV6_PRIVACY
+ {
+ .ctl_name = NET_IPV6_USE_TEMPADDR,
+ .procname = "use_tempaddr",
+ .data = &ipv6_devconf.use_tempaddr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_TEMP_VALID_LFT,
+ .procname = "temp_valid_lft",
+ .data = &ipv6_devconf.temp_valid_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_TEMP_PREFERED_LFT,
+ .procname = "temp_prefered_lft",
+ .data = &ipv6_devconf.temp_prefered_lft,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_REGEN_MAX_RETRY,
+ .procname = "regen_max_retry",
+ .data = &ipv6_devconf.regen_max_retry,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = NET_IPV6_MAX_DESYNC_FACTOR,
+ .procname = "max_desync_factor",
+ .data = &ipv6_devconf.max_desync_factor,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+#endif
+ {
+ .ctl_name = NET_IPV6_MAX_ADDRESSES,
+ .procname = "max_addresses",
+ .data = &ipv6_devconf.max_addresses,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = 0, /* sentinel */
+ }
+ },
+ .addrconf_dev = {
+ {
+ .ctl_name = NET_PROTO_CONF_ALL,
+ .procname = "all",
+ .mode = 0555,
+ .child = addrconf_sysctl.addrconf_vars,
+ },
+ {
+ .ctl_name = 0, /* sentinel */
+ }
+ },
+ .addrconf_conf_dir = {
+ {
+ .ctl_name = NET_IPV6_CONF,
+ .procname = "conf",
+ .mode = 0555,
+ .child = addrconf_sysctl.addrconf_dev,
+ },
+ {
+ .ctl_name = 0, /* sentinel */
+ }
+ },
+ .addrconf_proto_dir = {
+ {
+ .ctl_name = NET_IPV6,
+ .procname = "ipv6",
+ .mode = 0555,
+ .child = addrconf_sysctl.addrconf_conf_dir,
+ },
+ {
+ .ctl_name = 0, /* sentinel */
+ }
+ },
+ .addrconf_root_dir = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = addrconf_sysctl.addrconf_proto_dir,
+ },
+ {
+ .ctl_name = 0, /* sentinel */
+ }
+ },
+};
+
+static void addrconf_sysctl_register(struct inet6_dev *idev, struct ipv6_devconf *p)
+{
+ int i;
+ struct net_device *dev = idev ? idev->dev : NULL;
+ struct addrconf_sysctl_table *t;
+ char *dev_name = NULL;
+
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
+ if (t == NULL)
+ return;
+ memcpy(t, &addrconf_sysctl, sizeof(*t));
+ for (i=0; t->addrconf_vars[i].data; i++) {
+ t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+ t->addrconf_vars[i].de = NULL;
+ t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+ }
+ if (dev) {
+ dev_name = dev->name;
+ t->addrconf_dev[0].ctl_name = dev->ifindex;
+ } else {
+ dev_name = "default";
+ t->addrconf_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+ }
+
+ /*
+ * Make a copy of dev_name, because '.procname' is regarded as const
+ * by sysctl and we wouldn't want anyone to change it under our feet
+ * (see SIOCSIFNAME).
+ */
+ dev_name = net_sysctl_strdup(dev_name);
+ if (!dev_name)
+ goto free;
+
+ t->addrconf_dev[0].procname = dev_name;
+
+ t->addrconf_dev[0].child = t->addrconf_vars;
+ t->addrconf_dev[0].de = NULL;
+ t->addrconf_conf_dir[0].child = t->addrconf_dev;
+ t->addrconf_conf_dir[0].de = NULL;
+ t->addrconf_proto_dir[0].child = t->addrconf_conf_dir;
+ t->addrconf_proto_dir[0].de = NULL;
+ t->addrconf_root_dir[0].child = t->addrconf_proto_dir;
+ t->addrconf_root_dir[0].de = NULL;
+
+ t->sysctl_header = register_sysctl_table(t->addrconf_root_dir, 0);
+ if (t->sysctl_header == NULL)
+ goto free_procname;
+ else
+ p->sysctl = t;
+ return;
+
+ /* error path */
+ free_procname:
+ kfree(dev_name);
+ free:
+ kfree(t);
+
+ return;
+}
+
+static void addrconf_sysctl_unregister(struct ipv6_devconf *p)
+{
+ if (p->sysctl) {
+ struct addrconf_sysctl_table *t = p->sysctl;
+ p->sysctl = NULL;
+ unregister_sysctl_table(t->sysctl_header);
+ kfree(t->addrconf_dev[0].procname);
+ kfree(t);
+ }
+}
+
+
+#endif
+
+/*
+ * Device notifier
+ */
+
+int register_inet6addr_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_register(&inet6addr_chain, nb);
+}
+
+int unregister_inet6addr_notifier(struct notifier_block *nb)
+{
+ return notifier_chain_unregister(&inet6addr_chain,nb);
+}
+
+/*
+ * Init / cleanup code
+ */
+
+int __init addrconf_init(void)
+{
+ int err = 0;
+
+ /* The addrconf netdev notifier requires that loopback_dev
+ * has it's ipv6 private information allocated and setup
+ * before it can bring up and give link-local addresses
+ * to other devices which are up.
+ *
+ * Unfortunately, loopback_dev is not necessarily the first
+ * entry in the global dev_base list of net devices. In fact,
+ * it is likely to be the very last entry on that list.
+ * So this causes the notifier registry below to try and
+ * give link-local addresses to all devices besides loopback_dev
+ * first, then loopback_dev, which cases all the non-loopback_dev
+ * devices to fail to get a link-local address.
+ *
+ * So, as a temporary fix, allocate the ipv6 structure for
+ * loopback_dev first by hand.
+ * Longer term, all of the dependencies ipv6 has upon the loopback
+ * device and it being up should be removed.
+ */
+ rtnl_lock();
+ if (!ipv6_add_dev(&loopback_dev))
+ err = -ENOMEM;
+ rtnl_unlock();
+ if (err)
+ return err;
+
+ register_netdevice_notifier(&ipv6_dev_notf);
+
+#ifdef CONFIG_IPV6_PRIVACY
+ md5_tfm = crypto_alloc_tfm("md5", 0);
+ if (unlikely(md5_tfm == NULL))
+ printk(KERN_WARNING
+ "failed to load transform for md5\n");
+#endif
+
+ addrconf_verify(0);
+ rtnetlink_links[PF_INET6] = inet6_rtnetlink_table;
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl.sysctl_header =
+ register_sysctl_table(addrconf_sysctl.addrconf_root_dir, 0);
+ addrconf_sysctl_register(NULL, &ipv6_devconf_dflt);
+#endif
+
+ return 0;
+}
+
+void __exit addrconf_cleanup(void)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ int i;
+
+ unregister_netdevice_notifier(&ipv6_dev_notf);
+
+ rtnetlink_links[PF_INET6] = NULL;
+#ifdef CONFIG_SYSCTL
+ addrconf_sysctl_unregister(&ipv6_devconf_dflt);
+ addrconf_sysctl_unregister(&ipv6_devconf);
+#endif
+
+ rtnl_lock();
+
+ /*
+ * clean dev list.
+ */
+
+ for (dev=dev_base; dev; dev=dev->next) {
+ if ((idev = __in6_dev_get(dev)) == NULL)
+ continue;
+ addrconf_ifdown(dev, 1);
+ }
+ addrconf_ifdown(&loopback_dev, 2);
+
+ /*
+ * Check hash table.
+ */
+
+ write_lock_bh(&addrconf_hash_lock);
+ for (i=0; i < IN6_ADDR_HSIZE; i++) {
+ for (ifa=inet6_addr_lst[i]; ifa; ) {
+ struct inet6_ifaddr *bifa;
+
+ bifa = ifa;
+ ifa = ifa->lst_next;
+ printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
+ /* Do not free it; something is wrong.
+ Now we can investigate it with debugger.
+ */
+ }
+ }
+ write_unlock_bh(&addrconf_hash_lock);
+
+ del_timer(&addr_chk_timer);
+
+ rtnl_unlock();
+
+#ifdef CONFIG_IPV6_PRIVACY
+ if (likely(md5_tfm != NULL)) {
+ crypto_free_tfm(md5_tfm);
+ md5_tfm = NULL;
+ }
+#endif
+
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("if_inet6");
+#endif
+}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
new file mode 100644
index 000000000000..768b11703daf
--- /dev/null
+++ b/net/ipv6/af_inet6.c
@@ -0,0 +1,867 @@
+/*
+ * PF_INET6 socket protocol family
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Adapted from linux/net/ipv4/af_inet.c
+ *
+ * $Id: af_inet6.c,v 1.66 2002/02/01 22:01:04 davem Exp $
+ *
+ * Fixes:
+ * piggy, Karl Knutson : Socket protocol table
+ * Hideaki YOSHIFUJI : sin6_scope_id support
+ * Arnaldo Melo : check proc_net_create return, cleanups
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/major.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/string.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/fcntl.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+#include <linux/smp_lock.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <net/ipip.h>
+#include <net/protocol.h>
+#include <net/inet_common.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#ifdef CONFIG_IPV6_TUNNEL
+#include <net/ip6_tunnel.h>
+#endif
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+MODULE_AUTHOR("Cast of dozens");
+MODULE_DESCRIPTION("IPv6 protocol stack for Linux");
+MODULE_LICENSE("GPL");
+
+/* IPv6 procfs goodies... */
+
+#ifdef CONFIG_PROC_FS
+extern int raw6_proc_init(void);
+extern void raw6_proc_exit(void);
+extern int tcp6_proc_init(void);
+extern void tcp6_proc_exit(void);
+extern int udp6_proc_init(void);
+extern void udp6_proc_exit(void);
+extern int ipv6_misc_proc_init(void);
+extern void ipv6_misc_proc_exit(void);
+extern int ac6_proc_init(void);
+extern void ac6_proc_exit(void);
+extern int if6_proc_init(void);
+extern void if6_proc_exit(void);
+#endif
+
+int sysctl_ipv6_bindv6only;
+
+#ifdef INET_REFCNT_DEBUG
+atomic_t inet6_sock_nr;
+#endif
+
+/* The inetsw table contains everything that inet_create needs to
+ * build a new socket.
+ */
+static struct list_head inetsw6[SOCK_MAX];
+static DEFINE_SPINLOCK(inetsw6_lock);
+
+static void inet6_sock_destruct(struct sock *sk)
+{
+ inet_sock_destruct(sk);
+
+#ifdef INET_REFCNT_DEBUG
+ atomic_dec(&inet6_sock_nr);
+#endif
+}
+
+static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
+{
+ const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo);
+
+ return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+}
+
+static int inet6_create(struct socket *sock, int protocol)
+{
+ struct inet_sock *inet;
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ struct list_head *p;
+ struct inet_protosw *answer;
+ struct proto *answer_prot;
+ unsigned char answer_flags;
+ char answer_no_check;
+ int rc;
+
+ /* Look for the requested type/protocol pair. */
+ answer = NULL;
+ rcu_read_lock();
+ list_for_each_rcu(p, &inetsw6[sock->type]) {
+ answer = list_entry(p, struct inet_protosw, list);
+
+ /* Check the non-wild match. */
+ if (protocol == answer->protocol) {
+ if (protocol != IPPROTO_IP)
+ break;
+ } else {
+ /* Check for the two wild cases. */
+ if (IPPROTO_IP == protocol) {
+ protocol = answer->protocol;
+ break;
+ }
+ if (IPPROTO_IP == answer->protocol)
+ break;
+ }
+ answer = NULL;
+ }
+
+ rc = -ESOCKTNOSUPPORT;
+ if (!answer)
+ goto out_rcu_unlock;
+ rc = -EPERM;
+ if (answer->capability > 0 && !capable(answer->capability))
+ goto out_rcu_unlock;
+ rc = -EPROTONOSUPPORT;
+ if (!protocol)
+ goto out_rcu_unlock;
+
+ sock->ops = answer->ops;
+
+ answer_prot = answer->prot;
+ answer_no_check = answer->no_check;
+ answer_flags = answer->flags;
+ rcu_read_unlock();
+
+ BUG_TRAP(answer_prot->slab != NULL);
+
+ rc = -ENOBUFS;
+ sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
+ if (sk == NULL)
+ goto out;
+
+ sock_init_data(sock, sk);
+
+ rc = 0;
+ sk->sk_no_check = answer_no_check;
+ if (INET_PROTOSW_REUSE & answer_flags)
+ sk->sk_reuse = 1;
+
+ inet = inet_sk(sk);
+
+ if (SOCK_RAW == sock->type) {
+ inet->num = protocol;
+ if (IPPROTO_RAW == protocol)
+ inet->hdrincl = 1;
+ }
+
+ sk->sk_destruct = inet6_sock_destruct;
+ sk->sk_family = PF_INET6;
+ sk->sk_protocol = protocol;
+
+ sk->sk_backlog_rcv = answer->prot->backlog_rcv;
+
+ inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk);
+ np->hop_limit = -1;
+ np->mcast_hops = -1;
+ np->mc_loop = 1;
+ np->pmtudisc = IPV6_PMTUDISC_WANT;
+ np->ipv6only = sysctl_ipv6_bindv6only;
+
+ /* Init the ipv4 part of the socket since we can have sockets
+ * using v6 API for ipv4.
+ */
+ inet->uc_ttl = -1;
+
+ inet->mc_loop = 1;
+ inet->mc_ttl = 1;
+ inet->mc_index = 0;
+ inet->mc_list = NULL;
+
+ if (ipv4_config.no_pmtu_disc)
+ inet->pmtudisc = IP_PMTUDISC_DONT;
+ else
+ inet->pmtudisc = IP_PMTUDISC_WANT;
+
+
+#ifdef INET_REFCNT_DEBUG
+ atomic_inc(&inet6_sock_nr);
+ atomic_inc(&inet_sock_nr);
+#endif
+ if (inet->num) {
+ /* It assumes that any protocol which allows
+ * the user to assign a number at socket
+ * creation time automatically shares.
+ */
+ inet->sport = ntohs(inet->num);
+ sk->sk_prot->hash(sk);
+ }
+ if (sk->sk_prot->init) {
+ rc = sk->sk_prot->init(sk);
+ if (rc) {
+ sk_common_release(sk);
+ goto out;
+ }
+ }
+out:
+ return rc;
+out_rcu_unlock:
+ rcu_read_unlock();
+ goto out;
+}
+
+
+/* bind for INET6 API */
+int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+{
+ struct sockaddr_in6 *addr=(struct sockaddr_in6 *)uaddr;
+ struct sock *sk = sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ __u32 v4addr = 0;
+ unsigned short snum;
+ int addr_type = 0;
+ int err = 0;
+
+ /* If the socket has its own bind function then use it. */
+ if (sk->sk_prot->bind)
+ return sk->sk_prot->bind(sk, uaddr, addr_len);
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+ addr_type = ipv6_addr_type(&addr->sin6_addr);
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ return -EINVAL;
+
+ snum = ntohs(addr->sin6_port);
+ if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
+ return -EACCES;
+
+ lock_sock(sk);
+
+ /* Check these errors (active socket, double bind). */
+ if (sk->sk_state != TCP_CLOSE || inet->num) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Check if the address belongs to the host. */
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ v4addr = addr->sin6_addr.s6_addr32[3];
+ if (inet_addr_type(v4addr) != RTN_LOCAL) {
+ err = -EADDRNOTAVAIL;
+ goto out;
+ }
+ } else {
+ if (addr_type != IPV6_ADDR_ANY) {
+ struct net_device *dev = NULL;
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ addr->sin6_scope_id) {
+ /* Override any existing binding, if another one
+ * is supplied by user.
+ */
+ sk->sk_bound_dev_if = addr->sin6_scope_id;
+ }
+
+ /* Binding to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if) {
+ err = -EINVAL;
+ goto out;
+ }
+ dev = dev_get_by_index(sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out;
+ }
+ }
+
+ /* ipv4 addr of the socket is invalid. Only the
+ * unspecified and mapped address have a v4 equivalent.
+ */
+ v4addr = LOOPBACK4_IPV6;
+ if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ if (!ipv6_chk_addr(&addr->sin6_addr, dev, 0)) {
+ if (dev)
+ dev_put(dev);
+ err = -EADDRNOTAVAIL;
+ goto out;
+ }
+ }
+ if (dev)
+ dev_put(dev);
+ }
+ }
+
+ inet->rcv_saddr = v4addr;
+ inet->saddr = v4addr;
+
+ ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+
+ if (!(addr_type & IPV6_ADDR_MULTICAST))
+ ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+
+ /* Make sure we are allowed to bind here. */
+ if (sk->sk_prot->get_port(sk, snum)) {
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+
+ if (addr_type != IPV6_ADDR_ANY)
+ sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
+ if (snum)
+ sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
+ inet->sport = ntohs(inet->num);
+ inet->dport = 0;
+ inet->daddr = 0;
+out:
+ release_sock(sk);
+ return err;
+}
+
+int inet6_release(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ if (sk == NULL)
+ return -EINVAL;
+
+ /* Free mc lists */
+ ipv6_sock_mc_close(sk);
+
+ /* Free ac lists */
+ ipv6_sock_ac_close(sk);
+
+ return inet_release(sock);
+}
+
+int inet6_destroy_sock(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+ struct ipv6_txoptions *opt;
+
+ /*
+ * Release destination entry
+ */
+
+ sk_dst_reset(sk);
+
+ /* Release rx options */
+
+ if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
+ kfree_skb(skb);
+
+ /* Free flowlabels */
+ fl6_free_socklist(sk);
+
+ /* Free tx options */
+
+ if ((opt = xchg(&np->opt, NULL)) != NULL)
+ sock_kfree_s(sk, opt, opt->tot_len);
+
+ return 0;
+}
+
+/*
+ * This does both peername and sockname.
+ */
+
+int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
+ int *uaddr_len, int peer)
+{
+ struct sockaddr_in6 *sin=(struct sockaddr_in6 *)uaddr;
+ struct sock *sk = sock->sk;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_scope_id = 0;
+ if (peer) {
+ if (!inet->dport)
+ return -ENOTCONN;
+ if (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) &&
+ peer == 1)
+ return -ENOTCONN;
+ sin->sin6_port = inet->dport;
+ ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+ if (np->sndflow)
+ sin->sin6_flowinfo = np->flow_label;
+ } else {
+ if (ipv6_addr_any(&np->rcv_saddr))
+ ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+ else
+ ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+
+ sin->sin6_port = inet->sport;
+ }
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = sk->sk_bound_dev_if;
+ *uaddr_len = sizeof(*sin);
+ return(0);
+}
+
+int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+ struct sock *sk = sock->sk;
+ int err = -EINVAL;
+
+ switch(cmd)
+ {
+ case SIOCGSTAMP:
+ return sock_get_timestamp(sk, (struct timeval __user *)arg);
+
+ case SIOCADDRT:
+ case SIOCDELRT:
+
+ return(ipv6_route_ioctl(cmd,(void __user *)arg));
+
+ case SIOCSIFADDR:
+ return addrconf_add_ifaddr((void __user *) arg);
+ case SIOCDIFADDR:
+ return addrconf_del_ifaddr((void __user *) arg);
+ case SIOCSIFDSTADDR:
+ return addrconf_set_dstaddr((void __user *) arg);
+ default:
+ if (!sk->sk_prot->ioctl ||
+ (err = sk->sk_prot->ioctl(sk, cmd, arg)) == -ENOIOCTLCMD)
+ return(dev_ioctl(cmd,(void __user *) arg));
+ return err;
+ }
+ /*NOTREACHED*/
+ return(0);
+}
+
+struct proto_ops inet6_stream_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_stream_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = inet_accept, /* ok */
+ .getname = inet6_getname,
+ .poll = tcp_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = inet_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = sock_common_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = tcp_sendpage
+};
+
+struct proto_ops inet6_dgram_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_dgram_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = sock_no_accept, /* a do nothing */
+ .getname = inet6_getname,
+ .poll = udp_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = sock_no_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = sock_common_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct net_proto_family inet6_family_ops = {
+ .family = PF_INET6,
+ .create = inet6_create,
+ .owner = THIS_MODULE,
+};
+
+#ifdef CONFIG_SYSCTL
+extern void ipv6_sysctl_register(void);
+extern void ipv6_sysctl_unregister(void);
+#endif
+
+/* Same as inet6_dgram_ops, sans udp_poll. */
+static struct proto_ops inet6_sockraw_ops = {
+ .family = PF_INET6,
+ .owner = THIS_MODULE,
+ .release = inet6_release,
+ .bind = inet6_bind,
+ .connect = inet_dgram_connect, /* ok */
+ .socketpair = sock_no_socketpair, /* a do nothing */
+ .accept = sock_no_accept, /* a do nothing */
+ .getname = inet6_getname,
+ .poll = datagram_poll, /* ok */
+ .ioctl = inet6_ioctl, /* must change */
+ .listen = sock_no_listen, /* ok */
+ .shutdown = inet_shutdown, /* ok */
+ .setsockopt = sock_common_setsockopt, /* ok */
+ .getsockopt = sock_common_getsockopt, /* ok */
+ .sendmsg = inet_sendmsg, /* ok */
+ .recvmsg = sock_common_recvmsg, /* ok */
+ .mmap = sock_no_mmap,
+ .sendpage = sock_no_sendpage,
+};
+
+static struct inet_protosw rawv6_protosw = {
+ .type = SOCK_RAW,
+ .protocol = IPPROTO_IP, /* wild card */
+ .prot = &rawv6_prot,
+ .ops = &inet6_sockraw_ops,
+ .capability = CAP_NET_RAW,
+ .no_check = UDP_CSUM_DEFAULT,
+ .flags = INET_PROTOSW_REUSE,
+};
+
+void
+inet6_register_protosw(struct inet_protosw *p)
+{
+ struct list_head *lh;
+ struct inet_protosw *answer;
+ int protocol = p->protocol;
+ struct list_head *last_perm;
+
+ spin_lock_bh(&inetsw6_lock);
+
+ if (p->type >= SOCK_MAX)
+ goto out_illegal;
+
+ /* If we are trying to override a permanent protocol, bail. */
+ answer = NULL;
+ last_perm = &inetsw6[p->type];
+ list_for_each(lh, &inetsw6[p->type]) {
+ answer = list_entry(lh, struct inet_protosw, list);
+
+ /* Check only the non-wild match. */
+ if (INET_PROTOSW_PERMANENT & answer->flags) {
+ if (protocol == answer->protocol)
+ break;
+ last_perm = lh;
+ }
+
+ answer = NULL;
+ }
+ if (answer)
+ goto out_permanent;
+
+ /* Add the new entry after the last permanent entry if any, so that
+ * the new entry does not override a permanent entry when matched with
+ * a wild-card protocol. But it is allowed to override any existing
+ * non-permanent entry. This means that when we remove this entry, the
+ * system automatically returns to the old behavior.
+ */
+ list_add_rcu(&p->list, last_perm);
+out:
+ spin_unlock_bh(&inetsw6_lock);
+ return;
+
+out_permanent:
+ printk(KERN_ERR "Attempt to override permanent protocol %d.\n",
+ protocol);
+ goto out;
+
+out_illegal:
+ printk(KERN_ERR
+ "Ignoring attempt to register invalid socket type %d.\n",
+ p->type);
+ goto out;
+}
+
+void
+inet6_unregister_protosw(struct inet_protosw *p)
+{
+ if (INET_PROTOSW_PERMANENT & p->flags) {
+ printk(KERN_ERR
+ "Attempt to unregister permanent protocol %d.\n",
+ p->protocol);
+ } else {
+ spin_lock_bh(&inetsw6_lock);
+ list_del_rcu(&p->list);
+ spin_unlock_bh(&inetsw6_lock);
+
+ synchronize_net();
+ }
+}
+
+int
+snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
+{
+ if (ptr == NULL)
+ return -EINVAL;
+
+ ptr[0] = __alloc_percpu(mibsize, mibalign);
+ if (!ptr[0])
+ goto err0;
+
+ ptr[1] = __alloc_percpu(mibsize, mibalign);
+ if (!ptr[1])
+ goto err1;
+
+ return 0;
+
+err1:
+ free_percpu(ptr[0]);
+ ptr[0] = NULL;
+err0:
+ return -ENOMEM;
+}
+
+void
+snmp6_mib_free(void *ptr[2])
+{
+ if (ptr == NULL)
+ return;
+ if (ptr[0])
+ free_percpu(ptr[0]);
+ if (ptr[1])
+ free_percpu(ptr[1]);
+ ptr[0] = ptr[1] = NULL;
+}
+
+static int __init init_ipv6_mibs(void)
+{
+ if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipstats_mib),
+ __alignof__(struct ipstats_mib)) < 0)
+ goto err_ip_mib;
+ if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+ __alignof__(struct icmpv6_mib)) < 0)
+ goto err_icmp_mib;
+ if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+ __alignof__(struct udp_mib)) < 0)
+ goto err_udp_mib;
+ return 0;
+
+err_udp_mib:
+ snmp6_mib_free((void **)icmpv6_statistics);
+err_icmp_mib:
+ snmp6_mib_free((void **)ipv6_statistics);
+err_ip_mib:
+ return -ENOMEM;
+
+}
+
+static void cleanup_ipv6_mibs(void)
+{
+ snmp6_mib_free((void **)ipv6_statistics);
+ snmp6_mib_free((void **)icmpv6_statistics);
+ snmp6_mib_free((void **)udp_stats_in6);
+}
+
+extern int ipv6_misc_proc_init(void);
+
+static int __init inet6_init(void)
+{
+ struct sk_buff *dummy_skb;
+ struct list_head *r;
+ int err;
+
+#ifdef MODULE
+#if 0 /* FIXME --RR */
+ if (!mod_member_present(&__this_module, can_unload))
+ return -EINVAL;
+
+ __this_module.can_unload = &ipv6_unload;
+#endif
+#endif
+
+ if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) {
+ printk(KERN_CRIT "inet6_proto_init: size fault\n");
+ return -EINVAL;
+ }
+
+ err = proto_register(&tcpv6_prot, 1);
+ if (err)
+ goto out;
+
+ err = proto_register(&udpv6_prot, 1);
+ if (err)
+ goto out_unregister_tcp_proto;
+
+ err = proto_register(&rawv6_prot, 1);
+ if (err)
+ goto out_unregister_udp_proto;
+
+
+ /* Register the socket-side information for inet6_create. */
+ for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r)
+ INIT_LIST_HEAD(r);
+
+ /* We MUST register RAW sockets before we create the ICMP6,
+ * IGMP6, or NDISC control sockets.
+ */
+ inet6_register_protosw(&rawv6_protosw);
+
+ /* Register the family here so that the init calls below will
+ * be able to create sockets. (?? is this dangerous ??)
+ */
+ (void) sock_register(&inet6_family_ops);
+
+ /* Initialise ipv6 mibs */
+ err = init_ipv6_mibs();
+ if (err)
+ goto out_unregister_raw_proto;
+
+ /*
+ * ipngwg API draft makes clear that the correct semantics
+ * for TCP and UDP is to consider one TCP and UDP instance
+ * in a host availiable by both INET and INET6 APIs and
+ * able to communicate via both network protocols.
+ */
+
+#ifdef CONFIG_SYSCTL
+ ipv6_sysctl_register();
+#endif
+ err = icmpv6_init(&inet6_family_ops);
+ if (err)
+ goto icmp_fail;
+ err = ndisc_init(&inet6_family_ops);
+ if (err)
+ goto ndisc_fail;
+ err = igmp6_init(&inet6_family_ops);
+ if (err)
+ goto igmp_fail;
+ /* Create /proc/foo6 entries. */
+#ifdef CONFIG_PROC_FS
+ err = -ENOMEM;
+ if (raw6_proc_init())
+ goto proc_raw6_fail;
+ if (tcp6_proc_init())
+ goto proc_tcp6_fail;
+ if (udp6_proc_init())
+ goto proc_udp6_fail;
+ if (ipv6_misc_proc_init())
+ goto proc_misc6_fail;
+
+ if (ac6_proc_init())
+ goto proc_anycast6_fail;
+ if (if6_proc_init())
+ goto proc_if6_fail;
+#endif
+ ipv6_packet_init();
+ ip6_route_init();
+ ip6_flowlabel_init();
+ err = addrconf_init();
+ if (err)
+ goto addrconf_fail;
+ sit_init();
+
+ /* Init v6 extension headers. */
+ ipv6_rthdr_init();
+ ipv6_frag_init();
+ ipv6_nodata_init();
+ ipv6_destopt_init();
+
+ /* Init v6 transport protocols. */
+ udpv6_init();
+ tcpv6_init();
+ err = 0;
+out:
+ return err;
+
+addrconf_fail:
+ ip6_flowlabel_cleanup();
+ ip6_route_cleanup();
+ ipv6_packet_cleanup();
+#ifdef CONFIG_PROC_FS
+ if6_proc_exit();
+proc_if6_fail:
+ ac6_proc_exit();
+proc_anycast6_fail:
+ ipv6_misc_proc_exit();
+proc_misc6_fail:
+ udp6_proc_exit();
+proc_udp6_fail:
+ tcp6_proc_exit();
+proc_tcp6_fail:
+ raw6_proc_exit();
+proc_raw6_fail:
+#endif
+ igmp6_cleanup();
+igmp_fail:
+ ndisc_cleanup();
+ndisc_fail:
+ icmpv6_cleanup();
+icmp_fail:
+#ifdef CONFIG_SYSCTL
+ ipv6_sysctl_unregister();
+#endif
+ cleanup_ipv6_mibs();
+out_unregister_raw_proto:
+ proto_unregister(&rawv6_prot);
+out_unregister_udp_proto:
+ proto_unregister(&udpv6_prot);
+out_unregister_tcp_proto:
+ proto_unregister(&tcpv6_prot);
+ goto out;
+}
+module_init(inet6_init);
+
+static void __exit inet6_exit(void)
+{
+ /* First of all disallow new sockets creation. */
+ sock_unregister(PF_INET6);
+#ifdef CONFIG_PROC_FS
+ if6_proc_exit();
+ ac6_proc_exit();
+ ipv6_misc_proc_exit();
+ udp6_proc_exit();
+ tcp6_proc_exit();
+ raw6_proc_exit();
+#endif
+ /* Cleanup code parts. */
+ sit_cleanup();
+ ip6_flowlabel_cleanup();
+ addrconf_cleanup();
+ ip6_route_cleanup();
+ ipv6_packet_cleanup();
+ igmp6_cleanup();
+ ndisc_cleanup();
+ icmpv6_cleanup();
+#ifdef CONFIG_SYSCTL
+ ipv6_sysctl_unregister();
+#endif
+ cleanup_ipv6_mibs();
+ proto_unregister(&rawv6_prot);
+ proto_unregister(&udpv6_prot);
+ proto_unregister(&tcpv6_prot);
+}
+module_exit(inet6_exit);
+
+MODULE_ALIAS_NETPROTO(PF_INET6);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
new file mode 100644
index 000000000000..e3ecf626cbf7
--- /dev/null
+++ b/net/ipv6/ah6.c
@@ -0,0 +1,478 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors
+ *
+ * Mitsuru KANDA @USAGI : IPv6 Support
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *
+ * This file is derived from net/ipv4/ah.c.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/ah.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/string.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <net/xfrm.h>
+#include <asm/scatterlist.h>
+
+static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
+{
+ u8 *opt = (u8 *)opthdr;
+ int len = ipv6_optlen(opthdr);
+ int off = 0;
+ int optlen = 0;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+
+ switch (opt[off]) {
+
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+ default:
+ if (len < 2)
+ goto bad;
+ optlen = opt[off+1]+2;
+ if (len < optlen)
+ goto bad;
+ if (opt[off] & 0x20)
+ memset(&opt[off+2], 0, opt[off+1]);
+ break;
+ }
+
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 1;
+
+bad:
+ return 0;
+}
+
+/**
+ * ipv6_rearrange_rthdr - rearrange IPv6 routing header
+ * @iph: IPv6 header
+ * @rthdr: routing header
+ *
+ * Rearrange the destination address in @iph and the addresses in @rthdr
+ * so that they appear in the order they will at the final destination.
+ * See Appendix A2 of RFC 2402 for details.
+ */
+static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
+{
+ int segments, segments_left;
+ struct in6_addr *addrs;
+ struct in6_addr final_addr;
+
+ segments_left = rthdr->segments_left;
+ if (segments_left == 0)
+ return;
+ rthdr->segments_left = 0;
+
+ /* The value of rthdr->hdrlen has been verified either by the system
+ * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
+ * packets. So we can assume that it is even and that segments is
+ * greater than or equal to segments_left.
+ *
+ * For the same reason we can assume that this option is of type 0.
+ */
+ segments = rthdr->hdrlen >> 1;
+
+ addrs = ((struct rt0_hdr *)rthdr)->addr;
+ ipv6_addr_copy(&final_addr, addrs + segments - 1);
+
+ addrs += segments - segments_left;
+ memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
+
+ ipv6_addr_copy(addrs, &iph->daddr);
+ ipv6_addr_copy(&iph->daddr, &final_addr);
+}
+
+static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
+{
+ union {
+ struct ipv6hdr *iph;
+ struct ipv6_opt_hdr *opth;
+ struct ipv6_rt_hdr *rth;
+ char *raw;
+ } exthdr = { .iph = iph };
+ char *end = exthdr.raw + len;
+ int nexthdr = iph->nexthdr;
+
+ exthdr.iph++;
+
+ while (exthdr.raw < end) {
+ switch (nexthdr) {
+ case NEXTHDR_HOP:
+ case NEXTHDR_DEST:
+ if (!zero_out_mutable_opts(exthdr.opth)) {
+ LIMIT_NETDEBUG(printk(
+ KERN_WARNING "overrun %sopts\n",
+ nexthdr == NEXTHDR_HOP ?
+ "hop" : "dest"));
+ return -EINVAL;
+ }
+ break;
+
+ case NEXTHDR_ROUTING:
+ ipv6_rearrange_rthdr(iph, exthdr.rth);
+ break;
+
+ default :
+ return 0;
+ }
+
+ nexthdr = exthdr.opth->nexthdr;
+ exthdr.raw += ipv6_optlen(exthdr.opth);
+ }
+
+ return 0;
+}
+
+static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+ int extlen;
+ struct ipv6hdr *top_iph;
+ struct ip_auth_hdr *ah;
+ struct ah_data *ahp;
+ u8 nexthdr;
+ char tmp_base[8];
+ struct {
+ struct in6_addr daddr;
+ char hdrs[0];
+ } *tmp_ext;
+
+ top_iph = (struct ipv6hdr *)skb->data;
+ top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
+
+ nexthdr = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_AH;
+
+ /* When there are no extension headers, we only need to save the first
+ * 8 bytes of the base IP header.
+ */
+ memcpy(tmp_base, top_iph, sizeof(tmp_base));
+
+ tmp_ext = NULL;
+ extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
+ if (extlen) {
+ extlen += sizeof(*tmp_ext);
+ tmp_ext = kmalloc(extlen, GFP_ATOMIC);
+ if (!tmp_ext) {
+ err = -ENOMEM;
+ goto error;
+ }
+ memcpy(tmp_ext, &top_iph->daddr, extlen);
+ err = ipv6_clear_mutable_options(top_iph,
+ extlen - sizeof(*tmp_ext) +
+ sizeof(*top_iph));
+ if (err)
+ goto error_free_iph;
+ }
+
+ ah = (struct ip_auth_hdr *)skb->h.raw;
+ ah->nexthdr = nexthdr;
+
+ top_iph->priority = 0;
+ top_iph->flow_lbl[0] = 0;
+ top_iph->flow_lbl[1] = 0;
+ top_iph->flow_lbl[2] = 0;
+ top_iph->hop_limit = 0;
+
+ ahp = x->data;
+ ah->hdrlen = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) +
+ ahp->icv_trunc_len) >> 2) - 2;
+
+ ah->reserved = 0;
+ ah->spi = x->id.spi;
+ ah->seq_no = htonl(++x->replay.oseq);
+ ahp->icv(ahp, skb, ah->auth_data);
+
+ err = 0;
+
+ memcpy(top_iph, tmp_base, sizeof(tmp_base));
+ if (tmp_ext) {
+ memcpy(&top_iph->daddr, tmp_ext, extlen);
+error_free_iph:
+ kfree(tmp_ext);
+ }
+
+error:
+ return err;
+}
+
+static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+ /*
+ * Before process AH
+ * [IPv6][Ext1][Ext2][AH][Dest][Payload]
+ * |<-------------->| hdr_len
+ *
+ * To erase AH:
+ * Keeping copy of cleared headers. After AH processing,
+ * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
+ * header length. Then copy back the copy as long as hdr_len
+ * If destination header following AH exists, copy it into after [Ext2].
+ *
+ * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
+ * There is offset of AH before IPv6 header after the process.
+ */
+
+ struct ipv6_auth_hdr *ah;
+ struct ah_data *ahp;
+ unsigned char *tmp_hdr = NULL;
+ u16 hdr_len;
+ u16 ah_hlen;
+ int nexthdr;
+
+ if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
+ goto out;
+
+ /* We are going to _remove_ AH header to keep sockets happy,
+ * so... Later this can change. */
+ if (skb_cloned(skb) &&
+ pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ goto out;
+
+ hdr_len = skb->data - skb->nh.raw;
+ ah = (struct ipv6_auth_hdr*)skb->data;
+ ahp = x->data;
+ nexthdr = ah->nexthdr;
+ ah_hlen = (ah->hdrlen + 2) << 2;
+
+ if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
+ ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
+ goto out;
+
+ if (!pskb_may_pull(skb, ah_hlen))
+ goto out;
+
+ tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+ if (!tmp_hdr)
+ goto out;
+ memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+ if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
+ goto out;
+ skb->nh.ipv6h->priority = 0;
+ skb->nh.ipv6h->flow_lbl[0] = 0;
+ skb->nh.ipv6h->flow_lbl[1] = 0;
+ skb->nh.ipv6h->flow_lbl[2] = 0;
+ skb->nh.ipv6h->hop_limit = 0;
+
+ {
+ u8 auth_data[MAX_AH_AUTH_LEN];
+
+ memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
+ memset(ah->auth_data, 0, ahp->icv_trunc_len);
+ skb_push(skb, skb->data - skb->nh.raw);
+ ahp->icv(ahp, skb, ah->auth_data);
+ if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
+ LIMIT_NETDEBUG(
+ printk(KERN_WARNING "ipsec ah authentication error\n"));
+ x->stats.integrity_failed++;
+ goto free_out;
+ }
+ }
+
+ skb->nh.raw = skb_pull(skb, ah_hlen);
+ memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+ skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ skb_pull(skb, hdr_len);
+ skb->h.raw = skb->data;
+
+
+ kfree(tmp_hdr);
+
+ return nexthdr;
+
+free_out:
+ kfree(tmp_hdr);
+out:
+ return -EINVAL;
+}
+
+static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
+ if (!x)
+ return;
+
+ NETDEBUG(printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
+ "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ ntohl(ah->spi), NIP6(iph->daddr)));
+
+ xfrm_state_put(x);
+}
+
+static int ah6_init_state(struct xfrm_state *x, void *args)
+{
+ struct ah_data *ahp = NULL;
+ struct xfrm_algo_desc *aalg_desc;
+
+ if (!x->aalg)
+ goto error;
+
+ /* null auth can use a zero length key */
+ if (x->aalg->alg_key_len > 512)
+ goto error;
+
+ if (x->encap)
+ goto error;
+
+ ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
+ if (ahp == NULL)
+ return -ENOMEM;
+
+ memset(ahp, 0, sizeof(*ahp));
+
+ ahp->key = x->aalg->alg_key;
+ ahp->key_len = (x->aalg->alg_key_len+7)/8;
+ ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+ if (!ahp->tfm)
+ goto error;
+ ahp->icv = ah_hmac_digest;
+
+ /*
+ * Lookup the algorithm description maintained by xfrm_algo,
+ * verify crypto transform properties, and store information
+ * we need for AH processing. This lookup cannot fail here
+ * after a successful crypto_alloc_tfm().
+ */
+ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+ BUG_ON(!aalg_desc);
+
+ if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ crypto_tfm_alg_digestsize(ahp->tfm)) {
+ printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
+ x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
+ goto error;
+ }
+
+ ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+ ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+
+ BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
+
+ ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
+ if (!ahp->work_icv)
+ goto error;
+
+ x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
+ if (x->props.mode)
+ x->props.header_len += sizeof(struct ipv6hdr);
+ x->data = ahp;
+
+ return 0;
+
+error:
+ if (ahp) {
+ if (ahp->work_icv)
+ kfree(ahp->work_icv);
+ if (ahp->tfm)
+ crypto_free_tfm(ahp->tfm);
+ kfree(ahp);
+ }
+ return -EINVAL;
+}
+
+static void ah6_destroy(struct xfrm_state *x)
+{
+ struct ah_data *ahp = x->data;
+
+ if (!ahp)
+ return;
+
+ if (ahp->work_icv) {
+ kfree(ahp->work_icv);
+ ahp->work_icv = NULL;
+ }
+ if (ahp->tfm) {
+ crypto_free_tfm(ahp->tfm);
+ ahp->tfm = NULL;
+ }
+ kfree(ahp);
+}
+
+static struct xfrm_type ah6_type =
+{
+ .description = "AH6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_AH,
+ .init_state = ah6_init_state,
+ .destructor = ah6_destroy,
+ .input = ah6_input,
+ .output = ah6_output
+};
+
+static struct inet6_protocol ah6_protocol = {
+ .handler = xfrm6_rcv,
+ .err_handler = ah6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init ah6_init(void)
+{
+ if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+
+ if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
+ printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
+ xfrm_unregister_type(&ah6_type, AF_INET6);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit ah6_fini(void)
+{
+ if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
+ printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
+
+ if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
+
+}
+
+module_init(ah6_init);
+module_exit(ah6_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
new file mode 100644
index 000000000000..5d22ca3cca2e
--- /dev/null
+++ b/net/ipv6/anycast.c
@@ -0,0 +1,594 @@
+/*
+ * Anycast support for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * David L Stevens (dlstevens@us.ibm.com)
+ *
+ * based heavily on net/ipv6/mcast.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/checksum.h>
+
+static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr);
+
+/* Big ac list lock for all the sockets */
+static DEFINE_RWLOCK(ipv6_sk_ac_lock);
+
+static int
+ip6_onlink(struct in6_addr *addr, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ int onlink;
+
+ onlink = 0;
+ read_lock(&addrconf_lock);
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+ onlink = ipv6_prefix_equal(addr, &ifa->addr,
+ ifa->prefix_len);
+ if (onlink)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ read_unlock(&addrconf_lock);
+ return onlink;
+}
+
+/*
+ * socket join an anycast group
+ */
+
+int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev;
+ struct ipv6_ac_socklist *pac;
+ int ishost = !ipv6_devconf.forwarding;
+ int err = 0;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+ if (ipv6_chk_addr(addr, NULL, 0))
+ return -EINVAL;
+
+ pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+ if (pac == NULL)
+ return -ENOMEM;
+ pac->acl_next = NULL;
+ ipv6_addr_copy(&pac->acl_addr, addr);
+
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+
+ rt = rt6_lookup(addr, NULL, 0, 0);
+ if (rt) {
+ dev = rt->rt6i_dev;
+ dev_hold(dev);
+ dst_release(&rt->u.dst);
+ } else if (ishost) {
+ err = -EADDRNOTAVAIL;
+ goto out_free_pac;
+ } else {
+ /* router, no matching interface: just pick one */
+
+ dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
+ }
+ } else
+ dev = dev_get_by_index(ifindex);
+
+ if (dev == NULL) {
+ err = -ENODEV;
+ goto out_free_pac;
+ }
+
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ if (ifindex)
+ err = -ENODEV;
+ else
+ err = -EADDRNOTAVAIL;
+ goto out_dev_put;
+ }
+ /* reset ishost, now that we have a specific device */
+ ishost = !idev->cnf.forwarding;
+ in6_dev_put(idev);
+
+ pac->acl_ifindex = dev->ifindex;
+
+ /* XXX
+ * For hosts, allow link-local or matching prefix anycasts.
+ * This obviates the need for propagating anycast routes while
+ * still allowing some non-router anycast participation.
+ */
+ if (!ip6_onlink(addr, dev)) {
+ if (ishost)
+ err = -EADDRNOTAVAIL;
+ if (err)
+ goto out_dev_put;
+ }
+
+ err = ipv6_dev_ac_inc(dev, addr);
+ if (err)
+ goto out_dev_put;
+
+ write_lock_bh(&ipv6_sk_ac_lock);
+ pac->acl_next = np->ipv6_ac_list;
+ np->ipv6_ac_list = pac;
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+ dev_put(dev);
+
+ return 0;
+
+out_dev_put:
+ dev_put(dev);
+out_free_pac:
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return err;
+}
+
+/*
+ * socket leave an anycast group
+ */
+int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev;
+ struct ipv6_ac_socklist *pac, *prev_pac;
+
+ write_lock_bh(&ipv6_sk_ac_lock);
+ prev_pac = NULL;
+ for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) {
+ if ((ifindex == 0 || pac->acl_ifindex == ifindex) &&
+ ipv6_addr_equal(&pac->acl_addr, addr))
+ break;
+ prev_pac = pac;
+ }
+ if (!pac) {
+ write_unlock_bh(&ipv6_sk_ac_lock);
+ return -ENOENT;
+ }
+ if (prev_pac)
+ prev_pac->acl_next = pac->acl_next;
+ else
+ np->ipv6_ac_list = pac->acl_next;
+
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+ dev = dev_get_by_index(pac->acl_ifindex);
+ if (dev) {
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ dev_put(dev);
+ }
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ return 0;
+}
+
+void ipv6_sock_ac_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net_device *dev = NULL;
+ struct ipv6_ac_socklist *pac;
+ int prev_index;
+
+ write_lock_bh(&ipv6_sk_ac_lock);
+ pac = np->ipv6_ac_list;
+ np->ipv6_ac_list = NULL;
+ write_unlock_bh(&ipv6_sk_ac_lock);
+
+ prev_index = 0;
+ while (pac) {
+ struct ipv6_ac_socklist *next = pac->acl_next;
+
+ if (pac->acl_ifindex != prev_index) {
+ if (dev)
+ dev_put(dev);
+ dev = dev_get_by_index(pac->acl_ifindex);
+ prev_index = pac->acl_ifindex;
+ }
+ if (dev)
+ ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ sock_kfree_s(sk, pac, sizeof(*pac));
+ pac = next;
+ }
+ if (dev)
+ dev_put(dev);
+}
+
+#if 0
+/* The function is not used, which is funny. Apparently, author
+ * supposed to use it to filter out datagrams inside udp/raw but forgot.
+ *
+ * It is OK, anycasts are not special comparing to delivery to unicasts.
+ */
+
+int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
+{
+ struct ipv6_ac_socklist *pac;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int found;
+
+ found = 0;
+ read_lock(&ipv6_sk_ac_lock);
+ for (pac=np->ipv6_ac_list; pac; pac=pac->acl_next) {
+ if (ifindex && pac->acl_ifindex != ifindex)
+ continue;
+ found = ipv6_addr_equal(&pac->acl_addr, addr);
+ if (found)
+ break;
+ }
+ read_unlock(&ipv6_sk_ac_lock);
+
+ return found;
+}
+
+#endif
+
+static void aca_put(struct ifacaddr6 *ac)
+{
+ if (atomic_dec_and_test(&ac->aca_refcnt)) {
+ in6_dev_put(ac->aca_idev);
+ dst_release(&ac->aca_rt->u.dst);
+ kfree(ac);
+ }
+}
+
+/*
+ * device anycast group inc (add if not found)
+ */
+int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca;
+ struct inet6_dev *idev;
+ struct rt6_info *rt;
+ int err;
+
+ idev = in6_dev_get(dev);
+
+ if (idev == NULL)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+ if (idev->dead) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr)) {
+ aca->aca_users++;
+ err = 0;
+ goto out;
+ }
+ }
+
+ /*
+ * not found: create a new one.
+ */
+
+ aca = kmalloc(sizeof(struct ifacaddr6), GFP_ATOMIC);
+
+ if (aca == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rt = addrconf_dst_alloc(idev, addr, 1);
+ if (IS_ERR(rt)) {
+ kfree(aca);
+ err = PTR_ERR(rt);
+ goto out;
+ }
+
+ memset(aca, 0, sizeof(struct ifacaddr6));
+
+ ipv6_addr_copy(&aca->aca_addr, addr);
+ aca->aca_idev = idev;
+ aca->aca_rt = rt;
+ aca->aca_users = 1;
+ /* aca_tstamp should be updated upon changes */
+ aca->aca_cstamp = aca->aca_tstamp = jiffies;
+ atomic_set(&aca->aca_refcnt, 2);
+ spin_lock_init(&aca->aca_lock);
+
+ aca->aca_next = idev->ac_list;
+ idev->ac_list = aca;
+ write_unlock_bh(&idev->lock);
+
+ dst_hold(&rt->u.dst);
+ if (ip6_ins_rt(rt, NULL, NULL))
+ dst_release(&rt->u.dst);
+
+ addrconf_join_solict(dev, &aca->aca_addr);
+
+ aca_put(aca);
+ return 0;
+out:
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return err;
+}
+
+/*
+ * device anycast group decrement
+ */
+int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
+{
+ struct ifacaddr6 *aca, *prev_aca;
+
+ write_lock_bh(&idev->lock);
+ prev_aca = NULL;
+ for (aca = idev->ac_list; aca; aca = aca->aca_next) {
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ prev_aca = aca;
+ }
+ if (!aca) {
+ write_unlock_bh(&idev->lock);
+ return -ENOENT;
+ }
+ if (--aca->aca_users > 0) {
+ write_unlock_bh(&idev->lock);
+ return 0;
+ }
+ if (prev_aca)
+ prev_aca->aca_next = aca->aca_next;
+ else
+ idev->ac_list = aca->aca_next;
+ write_unlock_bh(&idev->lock);
+ addrconf_leave_solict(idev, &aca->aca_addr);
+
+ dst_hold(&aca->aca_rt->u.dst);
+ if (ip6_del_rt(aca->aca_rt, NULL, NULL))
+ dst_free(&aca->aca_rt->u.dst);
+ else
+ dst_release(&aca->aca_rt->u.dst);
+
+ aca_put(aca);
+ return 0;
+}
+
+static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr)
+{
+ int ret;
+ struct inet6_dev *idev = in6_dev_get(dev);
+ if (idev == NULL)
+ return -ENODEV;
+ ret = __ipv6_dev_ac_dec(idev, addr);
+ in6_dev_put(idev);
+ return ret;
+}
+
+/*
+ * check if the interface has this anycast address
+ */
+static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr)
+{
+ struct inet6_dev *idev;
+ struct ifacaddr6 *aca;
+
+ idev = in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (aca = idev->ac_list; aca; aca = aca->aca_next)
+ if (ipv6_addr_equal(&aca->aca_addr, addr))
+ break;
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return aca != 0;
+ }
+ return 0;
+}
+
+/*
+ * check if given interface (or any, if dev==0) has this anycast address
+ */
+int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr)
+{
+ if (dev)
+ return ipv6_chk_acast_dev(dev, addr);
+ read_lock(&dev_base_lock);
+ for (dev=dev_base; dev; dev=dev->next)
+ if (ipv6_chk_acast_dev(dev, addr))
+ break;
+ read_unlock(&dev_base_lock);
+ return dev != 0;
+}
+
+
+#ifdef CONFIG_PROC_FS
+struct ac6_iter_state {
+ struct net_device *dev;
+ struct inet6_dev *idev;
+};
+
+#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private)
+
+static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq)
+{
+ struct ifacaddr6 *im = NULL;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ for (state->dev = dev_base, state->idev = NULL;
+ state->dev;
+ state->dev = state->dev->next) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (!idev)
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->ac_list;
+ if (im) {
+ state->idev = idev;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ im = im->aca_next;
+ while (!im) {
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ }
+ state->dev = state->dev->next;
+ if (!state->dev) {
+ state->idev = NULL;
+ break;
+ }
+ state->idev = in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ im = state->idev->ac_list;
+ }
+ return im;
+}
+
+static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ifacaddr6 *im = ac6_get_first(seq);
+ if (im)
+ while (pos && (im = ac6_get_next(seq, im)) != NULL)
+ --pos;
+ return pos ? NULL : im;
+}
+
+static void *ac6_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock(&dev_base_lock);
+ return ac6_get_idx(seq, *pos);
+}
+
+static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ifacaddr6 *im;
+ im = ac6_get_next(seq, v);
+ ++*pos;
+ return im;
+}
+
+static void ac6_seq_stop(struct seq_file *seq, void *v)
+{
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ }
+ read_unlock(&dev_base_lock);
+}
+
+static int ac6_seq_show(struct seq_file *seq, void *v)
+{
+ struct ifacaddr6 *im = (struct ifacaddr6 *)v;
+ struct ac6_iter_state *state = ac6_seq_private(seq);
+
+ seq_printf(seq,
+ "%-4d %-15s "
+ "%04x%04x%04x%04x%04x%04x%04x%04x "
+ "%5d\n",
+ state->dev->ifindex, state->dev->name,
+ NIP6(im->aca_addr),
+ im->aca_users);
+ return 0;
+}
+
+static struct seq_operations ac6_seq_ops = {
+ .start = ac6_seq_start,
+ .next = ac6_seq_next,
+ .stop = ac6_seq_stop,
+ .show = ac6_seq_show,
+};
+
+static int ac6_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+ struct ac6_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
+
+ rc = seq_open(file, &ac6_seq_ops);
+ if (rc)
+ goto out_kfree;
+
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
+out:
+ return rc;
+out_kfree:
+ kfree(s);
+ goto out;
+}
+
+static struct file_operations ac6_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ac6_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+int __init ac6_proc_init(void)
+{
+ if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
+ return -ENOMEM;
+
+ return 0;
+}
+
+void ac6_proc_exit(void)
+{
+ proc_net_remove("anycast6");
+}
+#endif
+
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
new file mode 100644
index 000000000000..65b9375df57d
--- /dev/null
+++ b/net/ipv6/datagram.c
@@ -0,0 +1,600 @@
+/*
+ * common UDP/RAW code
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * $Id: datagram.c,v 1.24 2002/02/01 22:01:04 davem Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/route.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+
+#include <linux/errqueue.h>
+#include <asm/uaccess.h>
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *daddr, *final_p = NULL, final;
+ struct dst_entry *dst;
+ struct flowi fl;
+ struct ip6_flowlabel *flowlabel = NULL;
+ int addr_type;
+ int err;
+
+ if (usin->sin6_family == AF_INET) {
+ if (__ipv6_only_sock(sk))
+ return -EAFNOSUPPORT;
+ err = ip4_datagram_connect(sk, uaddr, addr_len);
+ goto ipv4_connected;
+ }
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ if (usin->sin6_family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ memset(&fl, 0, sizeof(fl));
+ if (np->sndflow) {
+ fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+ flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
+ if (flowlabel == NULL)
+ return -EINVAL;
+ ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+ }
+ }
+
+ addr_type = ipv6_addr_type(&usin->sin6_addr);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ /*
+ * connect to self
+ */
+ usin->sin6_addr.s6_addr[15] = 0x01;
+ }
+
+ daddr = &usin->sin6_addr;
+
+ if (addr_type == IPV6_ADDR_MAPPED) {
+ struct sockaddr_in sin;
+
+ if (__ipv6_only_sock(sk)) {
+ err = -ENETUNREACH;
+ goto out;
+ }
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = daddr->s6_addr32[3];
+ sin.sin_port = usin->sin6_port;
+
+ err = ip4_datagram_connect(sk,
+ (struct sockaddr*) &sin,
+ sizeof(sin));
+
+ipv4_connected:
+ if (err)
+ goto out;
+
+ ipv6_addr_set(&np->daddr, 0, 0, htonl(0x0000ffff), inet->daddr);
+
+ if (ipv6_addr_any(&np->saddr)) {
+ ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000ffff),
+ inet->saddr);
+ }
+
+ if (ipv6_addr_any(&np->rcv_saddr)) {
+ ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000ffff),
+ inet->rcv_saddr);
+ }
+ goto out;
+ }
+
+ if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ if (addr_len >= sizeof(struct sockaddr_in6) &&
+ usin->sin6_scope_id) {
+ if (sk->sk_bound_dev_if &&
+ sk->sk_bound_dev_if != usin->sin6_scope_id) {
+ err = -EINVAL;
+ goto out;
+ }
+ sk->sk_bound_dev_if = usin->sin6_scope_id;
+ if (!sk->sk_bound_dev_if &&
+ (addr_type & IPV6_ADDR_MULTICAST))
+ fl.oif = np->mcast_oif;
+ }
+
+ /* Connect to link-local address requires an interface */
+ if (!sk->sk_bound_dev_if) {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ ipv6_addr_copy(&np->daddr, daddr);
+ np->flow_label = fl.fl6_flowlabel;
+
+ inet->dport = usin->sin6_port;
+
+ /*
+ * Check for a route to destination an obtain the
+ * destination cache for it.
+ */
+
+ fl.proto = sk->sk_protocol;
+ ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ fl.oif = sk->sk_bound_dev_if;
+ fl.fl_ip_dport = inet->dport;
+ fl.fl_ip_sport = inet->sport;
+
+ if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
+ fl.oif = np->mcast_oif;
+
+ if (flowlabel) {
+ if (flowlabel->opt && flowlabel->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+ } else if (np->opt && np->opt->srcrt) {
+ struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+ ipv6_addr_copy(&final, &fl.fl6_dst);
+ ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ final_p = &final;
+ }
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto out;
+ if (final_p)
+ ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+ dst_release(dst);
+ goto out;
+ }
+
+ /* source address lookup done in ip6_dst_lookup */
+
+ if (ipv6_addr_any(&np->saddr))
+ ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+
+ if (ipv6_addr_any(&np->rcv_saddr)) {
+ ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
+ inet->rcv_saddr = LOOPBACK4_IPV6;
+ }
+
+ ip6_dst_store(sk, dst,
+ ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
+ &np->daddr : NULL);
+
+ sk->sk_state = TCP_ESTABLISHED;
+out:
+ fl6_sock_release(flowlabel);
+ return err;
+}
+
+void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
+ u16 port, u32 info, u8 *payload)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct icmp6hdr *icmph = (struct icmp6hdr *)skb->h.raw;
+ struct sock_exterr_skb *serr;
+
+ if (!np->recverr)
+ return;
+
+ skb = skb_clone(skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ serr = SKB_EXT_ERR(skb);
+ serr->ee.ee_errno = err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_ICMP6;
+ serr->ee.ee_type = icmph->icmp6_type;
+ serr->ee.ee_code = icmph->icmp6_code;
+ serr->ee.ee_pad = 0;
+ serr->ee.ee_info = info;
+ serr->ee.ee_data = 0;
+ serr->addr_offset = (u8*)&(((struct ipv6hdr*)(icmph+1))->daddr) - skb->nh.raw;
+ serr->port = port;
+
+ skb->h.raw = payload;
+ __skb_pull(skb, payload - skb->data);
+
+ if (sock_queue_err_skb(sk, skb))
+ kfree_skb(skb);
+}
+
+void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sock_exterr_skb *serr;
+ struct ipv6hdr *iph;
+ struct sk_buff *skb;
+
+ if (!np->recverr)
+ return;
+
+ skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
+ skb->nh.ipv6h = iph;
+ ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+ serr = SKB_EXT_ERR(skb);
+ serr->ee.ee_errno = err;
+ serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
+ serr->ee.ee_type = 0;
+ serr->ee.ee_code = 0;
+ serr->ee.ee_pad = 0;
+ serr->ee.ee_info = info;
+ serr->ee.ee_data = 0;
+ serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+ serr->port = fl->fl_ip_dport;
+
+ skb->h.raw = skb->tail;
+ __skb_pull(skb, skb->tail - skb->data);
+
+ if (sock_queue_err_skb(sk, skb))
+ kfree_skb(skb);
+}
+
+/*
+ * Handle MSG_ERRQUEUE
+ */
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb, *skb2;
+ struct sockaddr_in6 *sin;
+ struct {
+ struct sock_extended_err ee;
+ struct sockaddr_in6 offender;
+ } errhdr;
+ int err;
+ int copied;
+
+ err = -EAGAIN;
+ skb = skb_dequeue(&sk->sk_error_queue);
+ if (skb == NULL)
+ goto out;
+
+ copied = skb->len;
+ if (copied > len) {
+ msg->msg_flags |= MSG_TRUNC;
+ copied = len;
+ }
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ if (err)
+ goto out_free_skb;
+
+ sock_recv_timestamp(msg, sk, skb);
+
+ serr = SKB_EXT_ERR(skb);
+
+ sin = (struct sockaddr_in6 *)msg->msg_name;
+ if (sin) {
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_port = serr->port;
+ sin->sin6_scope_id = 0;
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+ ipv6_addr_copy(&sin->sin6_addr,
+ (struct in6_addr *)(skb->nh.raw + serr->addr_offset));
+ if (np->sndflow)
+ sin->sin6_flowinfo = *(u32*)(skb->nh.raw + serr->addr_offset - 24) & IPV6_FLOWINFO_MASK;
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = IP6CB(skb)->iif;
+ } else {
+ ipv6_addr_set(&sin->sin6_addr, 0, 0,
+ htonl(0xffff),
+ *(u32*)(skb->nh.raw + serr->addr_offset));
+ }
+ }
+
+ memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
+ sin = &errhdr.offender;
+ sin->sin6_family = AF_UNSPEC;
+ if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+ sin->sin6_family = AF_INET6;
+ sin->sin6_flowinfo = 0;
+ sin->sin6_scope_id = 0;
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6) {
+ ipv6_addr_copy(&sin->sin6_addr, &skb->nh.ipv6h->saddr);
+ if (np->rxopt.all)
+ datagram_recv_ctl(sk, msg, skb);
+ if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ sin->sin6_scope_id = IP6CB(skb)->iif;
+ } else {
+ struct inet_sock *inet = inet_sk(sk);
+
+ ipv6_addr_set(&sin->sin6_addr, 0, 0,
+ htonl(0xffff),
+ skb->nh.iph->saddr);
+ if (inet->cmsg_flags)
+ ip_cmsg_recv(msg, skb);
+ }
+ }
+
+ put_cmsg(msg, SOL_IPV6, IPV6_RECVERR, sizeof(errhdr), &errhdr);
+
+ /* Now we could try to dump offended packet options */
+
+ msg->msg_flags |= MSG_ERRQUEUE;
+ err = copied;
+
+ /* Reset and regenerate socket error */
+ spin_lock_irq(&sk->sk_error_queue.lock);
+ sk->sk_err = 0;
+ if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+ sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+ spin_unlock_irq(&sk->sk_error_queue.lock);
+ sk->sk_error_report(sk);
+ } else {
+ spin_unlock_irq(&sk->sk_error_queue.lock);
+ }
+
+out_free_skb:
+ kfree_skb(skb);
+out:
+ return err;
+}
+
+
+
+int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ if (np->rxopt.bits.rxinfo) {
+ struct in6_pktinfo src_info;
+
+ src_info.ipi6_ifindex = opt->iif;
+ ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr);
+ put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+ }
+
+ if (np->rxopt.bits.rxhlim) {
+ int hlim = skb->nh.ipv6h->hop_limit;
+ put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+ }
+
+ if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) {
+ u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK;
+ put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo);
+ }
+ if (np->rxopt.bits.hopopts && opt->hop) {
+ u8 *ptr = skb->nh.raw + opt->hop;
+ put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.dstopts && opt->dst0) {
+ u8 *ptr = skb->nh.raw + opt->dst0;
+ put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ if (np->rxopt.bits.srcrt && opt->srcrt) {
+ struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt);
+ put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr);
+ }
+ if (np->rxopt.bits.dstopts && opt->dst1) {
+ u8 *ptr = skb->nh.raw + opt->dst1;
+ put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr);
+ }
+ return 0;
+}
+
+int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
+ struct ipv6_txoptions *opt,
+ int *hlimit)
+{
+ struct in6_pktinfo *src_info;
+ struct cmsghdr *cmsg;
+ struct ipv6_rt_hdr *rthdr;
+ struct ipv6_opt_hdr *hdr;
+ int len;
+ int err = 0;
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ int addr_type;
+ struct net_device *dev = NULL;
+
+ if (!CMSG_OK(msg, cmsg)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ if (cmsg->cmsg_level != SOL_IPV6)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case IPV6_PKTINFO:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+
+ if (src_info->ipi6_ifindex) {
+ if (fl->oif && src_info->ipi6_ifindex != fl->oif)
+ return -EINVAL;
+ fl->oif = src_info->ipi6_ifindex;
+ }
+
+ addr_type = ipv6_addr_type(&src_info->ipi6_addr);
+
+ if (addr_type == IPV6_ADDR_ANY)
+ break;
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL) {
+ if (!src_info->ipi6_ifindex)
+ return -EINVAL;
+ else {
+ dev = dev_get_by_index(src_info->ipi6_ifindex);
+ if (!dev)
+ return -ENODEV;
+ }
+ }
+ if (!ipv6_chk_addr(&src_info->ipi6_addr, dev, 0)) {
+ if (dev)
+ dev_put(dev);
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (dev)
+ dev_put(dev);
+
+ ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr);
+ break;
+
+ case IPV6_FLOWINFO:
+ if (cmsg->cmsg_len < CMSG_LEN(4)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ if (fl->fl6_flowlabel&IPV6_FLOWINFO_MASK) {
+ if ((fl->fl6_flowlabel^*(u32 *)CMSG_DATA(cmsg))&~IPV6_FLOWINFO_MASK) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ }
+ fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg);
+ break;
+
+ case IPV6_HOPOPTS:
+ if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ opt->opt_nflen += len;
+ opt->hopopt = hdr;
+ break;
+
+ case IPV6_DSTOPTS:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg);
+ len = ((hdr->hdrlen + 1) << 3);
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ if (!capable(CAP_NET_RAW)) {
+ err = -EPERM;
+ goto exit_f;
+ }
+ if (opt->dst1opt) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+ opt->opt_flen += len;
+ opt->dst1opt = hdr;
+ break;
+
+ case IPV6_RTHDR:
+ if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg);
+
+ /*
+ * TYPE 0
+ */
+ if (rthdr->type) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ len = ((rthdr->hdrlen + 1) << 3);
+
+ if (cmsg->cmsg_len < CMSG_LEN(len)) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ /* segments left must also match */
+ if ((rthdr->hdrlen >> 1) != rthdr->segments_left) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ opt->opt_nflen += len;
+ opt->srcrt = rthdr;
+
+ if (opt->dst1opt) {
+ int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3);
+
+ opt->opt_nflen += dsthdrlen;
+ opt->dst0opt = opt->dst1opt;
+ opt->dst1opt = NULL;
+ opt->opt_flen -= dsthdrlen;
+ }
+
+ break;
+
+ case IPV6_HOPLIMIT:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+ err = -EINVAL;
+ goto exit_f;
+ }
+
+ *hlimit = *(int *)CMSG_DATA(cmsg);
+ break;
+
+ default:
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type));
+ err = -EINVAL;
+ break;
+ };
+ }
+
+exit_f:
+ return err;
+}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
new file mode 100644
index 000000000000..be7095d6babe
--- /dev/null
+++ b/net/ipv6/esp6.c
@@ -0,0 +1,424 @@
+/*
+ * Copyright (C)2002 USAGI/WIDE Project
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors
+ *
+ * Mitsuru KANDA @USAGI : IPv6 Support
+ * Kazunori MIYAZAWA @USAGI :
+ * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
+ *
+ * This file is derived from net/ipv4/esp.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/esp.h>
+#include <asm/scatterlist.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/icmpv6.h>
+
+static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+ int hdr_len;
+ struct ipv6hdr *top_iph;
+ struct ipv6_esp_hdr *esph;
+ struct crypto_tfm *tfm;
+ struct esp_data *esp;
+ struct sk_buff *trailer;
+ int blksize;
+ int clen;
+ int alen;
+ int nfrags;
+
+ esp = x->data;
+ hdr_len = skb->h.raw - skb->data +
+ sizeof(*esph) + esp->conf.ivlen;
+
+ /* Strip IP+ESP header. */
+ __skb_pull(skb, hdr_len);
+
+ /* Now skb is pure payload to encrypt */
+ err = -ENOMEM;
+
+ /* Round to block size */
+ clen = skb->len;
+
+ alen = esp->auth.icv_trunc_len;
+ tfm = esp->conf.tfm;
+ blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
+ clen = (clen + 2 + blksize-1)&~(blksize-1);
+ if (esp->conf.padlen)
+ clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+ if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
+ goto error;
+ }
+
+ /* Fill padding... */
+ do {
+ int i;
+ for (i=0; i<clen-skb->len - 2; i++)
+ *(u8*)(trailer->tail + i) = i+1;
+ } while (0);
+ *(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
+ pskb_put(skb, trailer, clen - skb->len);
+
+ top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
+ esph = (struct ipv6_esp_hdr *)skb->h.raw;
+ top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
+ *(u8*)(trailer->tail - 1) = *skb->nh.raw;
+ *skb->nh.raw = IPPROTO_ESP;
+
+ esph->spi = x->id.spi;
+ esph->seq_no = htonl(++x->replay.oseq);
+
+ if (esp->conf.ivlen)
+ crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+
+ do {
+ struct scatterlist *sg = &esp->sgbuf[0];
+
+ if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+ sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+ if (!sg)
+ goto error;
+ }
+ skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
+ crypto_cipher_encrypt(tfm, sg, sg, clen);
+ if (unlikely(sg != &esp->sgbuf[0]))
+ kfree(sg);
+ } while (0);
+
+ if (esp->conf.ivlen) {
+ memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
+ }
+
+ if (esp->auth.icv_full_len) {
+ esp->auth.icv(esp, skb, (u8*)esph-skb->data,
+ sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
+ pskb_put(skb, trailer, alen);
+ }
+
+ err = 0;
+
+error:
+ return err;
+}
+
+static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+ struct ipv6hdr *iph;
+ struct ipv6_esp_hdr *esph;
+ struct esp_data *esp = x->data;
+ struct sk_buff *trailer;
+ int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+ int alen = esp->auth.icv_trunc_len;
+ int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
+
+ int hdr_len = skb->h.raw - skb->nh.raw;
+ int nfrags;
+ unsigned char *tmp_hdr = NULL;
+ int ret = 0;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
+ ret = -EINVAL;
+ goto out_nofree;
+ }
+
+ if (elen <= 0 || (elen & (blksize-1))) {
+ ret = -EINVAL;
+ goto out_nofree;
+ }
+
+ tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+ if (!tmp_hdr) {
+ ret = -ENOMEM;
+ goto out_nofree;
+ }
+ memcpy(tmp_hdr, skb->nh.raw, hdr_len);
+
+ /* If integrity check is required, do this. */
+ if (esp->auth.icv_full_len) {
+ u8 sum[esp->auth.icv_full_len];
+ u8 sum1[alen];
+
+ esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
+
+ if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
+ BUG();
+
+ if (unlikely(memcmp(sum, sum1, alen))) {
+ x->stats.integrity_failed++;
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ esph = (struct ipv6_esp_hdr*)skb->data;
+ iph = skb->nh.ipv6h;
+
+ /* Get ivec. This can be wrong, check against another impls. */
+ if (esp->conf.ivlen)
+ crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
+
+ {
+ u8 nexthdr[2];
+ struct scatterlist *sg = &esp->sgbuf[0];
+ u8 padlen;
+
+ if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
+ sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
+ if (!sg) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
+ crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
+ if (unlikely(sg != &esp->sgbuf[0]))
+ kfree(sg);
+
+ if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
+ BUG();
+
+ padlen = nexthdr[0];
+ if (padlen+2 >= elen) {
+ LIMIT_NETDEBUG(
+ printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen));
+ ret = -EINVAL;
+ goto out;
+ }
+ /* ... check padding bits here. Silly. :-) */
+
+ pskb_trim(skb, skb->len - alen - padlen - 2);
+ skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
+ skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+ memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+ skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ ret = nexthdr[1];
+ }
+
+out:
+ kfree(tmp_hdr);
+out_nofree:
+ return ret;
+}
+
+static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
+{
+ struct esp_data *esp = x->data;
+ u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
+
+ if (x->props.mode) {
+ mtu = (mtu + 2 + blksize-1)&~(blksize-1);
+ } else {
+ /* The worst case. */
+ mtu += 2 + blksize;
+ }
+ if (esp->conf.padlen)
+ mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
+
+ return mtu + x->props.header_len + esp->auth.icv_full_len;
+}
+
+static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH &&
+ type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
+ if (!x)
+ return;
+ printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
+ "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ ntohl(esph->spi), NIP6(iph->daddr));
+ xfrm_state_put(x);
+}
+
+static void esp6_destroy(struct xfrm_state *x)
+{
+ struct esp_data *esp = x->data;
+
+ if (!esp)
+ return;
+
+ if (esp->conf.tfm) {
+ crypto_free_tfm(esp->conf.tfm);
+ esp->conf.tfm = NULL;
+ }
+ if (esp->conf.ivec) {
+ kfree(esp->conf.ivec);
+ esp->conf.ivec = NULL;
+ }
+ if (esp->auth.tfm) {
+ crypto_free_tfm(esp->auth.tfm);
+ esp->auth.tfm = NULL;
+ }
+ if (esp->auth.work_icv) {
+ kfree(esp->auth.work_icv);
+ esp->auth.work_icv = NULL;
+ }
+ kfree(esp);
+}
+
+static int esp6_init_state(struct xfrm_state *x, void *args)
+{
+ struct esp_data *esp = NULL;
+
+ /* null auth and encryption can have zero length keys */
+ if (x->aalg) {
+ if (x->aalg->alg_key_len > 512)
+ goto error;
+ }
+ if (x->ealg == NULL)
+ goto error;
+
+ if (x->encap)
+ goto error;
+
+ esp = kmalloc(sizeof(*esp), GFP_KERNEL);
+ if (esp == NULL)
+ return -ENOMEM;
+
+ memset(esp, 0, sizeof(*esp));
+
+ if (x->aalg) {
+ struct xfrm_algo_desc *aalg_desc;
+
+ esp->auth.key = x->aalg->alg_key;
+ esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
+ esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
+ if (esp->auth.tfm == NULL)
+ goto error;
+ esp->auth.icv = esp_hmac_digest;
+
+ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0);
+ BUG_ON(!aalg_desc);
+
+ if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ crypto_tfm_alg_digestsize(esp->auth.tfm)) {
+ printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_tfm_alg_digestsize(esp->auth.tfm),
+ aalg_desc->uinfo.auth.icv_fullbits/8);
+ goto error;
+ }
+
+ esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
+ esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
+
+ esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
+ if (!esp->auth.work_icv)
+ goto error;
+ }
+ esp->conf.key = x->ealg->alg_key;
+ esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
+ if (x->props.ealgo == SADB_EALG_NULL)
+ esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
+ else
+ esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
+ if (esp->conf.tfm == NULL)
+ goto error;
+ esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
+ esp->conf.padlen = 0;
+ if (esp->conf.ivlen) {
+ esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
+ if (unlikely(esp->conf.ivec == NULL))
+ goto error;
+ get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
+ }
+ if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len))
+ goto error;
+ x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
+ if (x->props.mode)
+ x->props.header_len += sizeof(struct ipv6hdr);
+ x->data = esp;
+ return 0;
+
+error:
+ x->data = esp;
+ esp6_destroy(x);
+ x->data = NULL;
+ return -EINVAL;
+}
+
+static struct xfrm_type esp6_type =
+{
+ .description = "ESP6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_ESP,
+ .init_state = esp6_init_state,
+ .destructor = esp6_destroy,
+ .get_max_size = esp6_get_max_size,
+ .input = esp6_input,
+ .output = esp6_output
+};
+
+static struct inet6_protocol esp6_protocol = {
+ .handler = xfrm6_rcv,
+ .err_handler = esp6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init esp6_init(void)
+{
+ if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+ if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
+ printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
+ xfrm_unregister_type(&esp6_type, AF_INET6);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static void __exit esp6_fini(void)
+{
+ if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
+ printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
+ if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
+}
+
+module_init(esp6_init);
+module_exit(esp6_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
new file mode 100644
index 000000000000..e0839eafc3a9
--- /dev/null
+++ b/net/ipv6/exthdrs.c
@@ -0,0 +1,575 @@
+/*
+ * Extension Header handling for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Andi Kleen <ak@muc.de>
+ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
+ *
+ * $Id: exthdrs.c,v 1.13 2001/06/19 15:58:56 davem Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ * yoshfuji : ensure not to overrun while parsing
+ * tlv options.
+ * Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
+ * YOSHIFUJI Hideaki @USAGI Register inbound extension header
+ * handlers as inet6_protocol{}.
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * Parsing tlv encoded headers.
+ *
+ * Parsing function "func" returns 1, if parsing succeed
+ * and 0, if it failed.
+ * It MUST NOT touch skb->h.
+ */
+
+struct tlvtype_proc {
+ int type;
+ int (*func)(struct sk_buff *skb, int offset);
+};
+
+/*********************
+ Generic functions
+ *********************/
+
+/* An unknown option is detected, decide what to do */
+
+static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
+{
+ switch ((skb->nh.raw[optoff] & 0xC0) >> 6) {
+ case 0: /* ignore */
+ return 1;
+
+ case 1: /* drop packet */
+ break;
+
+ case 3: /* Send ICMP if not a multicast address and drop packet */
+ /* Actually, it is redundant check. icmp_send
+ will recheck in any case.
+ */
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
+ break;
+ case 2: /* send ICMP PARM PROB regardless and drop packet */
+ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
+ return 0;
+ };
+
+ kfree_skb(skb);
+ return 0;
+}
+
+/* Parse tlv encoded option header (hop-by-hop or destination) */
+
+static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb)
+{
+ struct tlvtype_proc *curr;
+ int off = skb->h.raw - skb->nh.raw;
+ int len = ((skb->h.raw[1]+1)<<3);
+
+ if ((skb->h.raw + len) - skb->data > skb_headlen(skb))
+ goto bad;
+
+ off += 2;
+ len -= 2;
+
+ while (len > 0) {
+ int optlen = skb->nh.raw[off+1]+2;
+
+ switch (skb->nh.raw[off]) {
+ case IPV6_TLV_PAD0:
+ optlen = 1;
+ break;
+
+ case IPV6_TLV_PADN:
+ break;
+
+ default: /* Other TLV code so scan list */
+ if (optlen > len)
+ goto bad;
+ for (curr=procs; curr->type >= 0; curr++) {
+ if (curr->type == skb->nh.raw[off]) {
+ /* type specific length/alignment
+ checks will be performed in the
+ func(). */
+ if (curr->func(skb, off) == 0)
+ return 0;
+ break;
+ }
+ }
+ if (curr->type < 0) {
+ if (ip6_tlvopt_unknown(skb, off) == 0)
+ return 0;
+ }
+ break;
+ }
+ off += optlen;
+ len -= optlen;
+ }
+ if (len == 0)
+ return 1;
+bad:
+ kfree_skb(skb);
+ return 0;
+}
+
+/*****************************
+ Destination options header.
+ *****************************/
+
+static struct tlvtype_proc tlvprocdestopt_lst[] = {
+ /* No destination options are defined now */
+ {-1, NULL}
+};
+
+static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+ struct sk_buff *skb = *skbp;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
+ !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ opt->dst1 = skb->h.raw - skb->nh.raw;
+
+ if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+ skb->h.raw += ((skb->h.raw[1]+1)<<3);
+ *nhoffp = opt->dst1;
+ return 1;
+ }
+
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ return -1;
+}
+
+static struct inet6_protocol destopt_protocol = {
+ .handler = ipv6_destopt_rcv,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_destopt_init(void)
+{
+ if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
+ printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
+}
+
+/********************************
+ NONE header. No data in packet.
+ ********************************/
+
+static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+ struct sk_buff *skb = *skbp;
+
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct inet6_protocol nodata_protocol = {
+ .handler = ipv6_nodata_rcv,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_nodata_init(void)
+{
+ if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
+ printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
+}
+
+/********************************
+ Routing header.
+ ********************************/
+
+static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+{
+ struct sk_buff *skb = *skbp;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+ struct in6_addr *addr;
+ struct in6_addr daddr;
+ int n, i;
+
+ struct ipv6_rt_hdr *hdr;
+ struct rt0_hdr *rthdr;
+
+ if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
+ !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ hdr = (struct ipv6_rt_hdr *) skb->h.raw;
+
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
+ skb->pkt_type != PACKET_HOST) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+looped_back:
+ if (hdr->segments_left == 0) {
+ opt->srcrt = skb->h.raw - skb->nh.raw;
+ skb->h.raw += (hdr->hdrlen + 1) << 3;
+ opt->dst0 = opt->dst1;
+ opt->dst1 = 0;
+ *nhoffp = (&hdr->nexthdr) - skb->nh.raw;
+ return 1;
+ }
+
+ if (hdr->type != IPV6_SRCRT_TYPE_0) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+ return -1;
+ }
+
+ if (hdr->hdrlen & 0x01) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw);
+ return -1;
+ }
+
+ /*
+ * This is the routing header forwarding algorithm from
+ * RFC 2460, page 16.
+ */
+
+ n = hdr->hdrlen >> 1;
+
+ if (hdr->segments_left > n) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->segments_left) - skb->nh.raw);
+ return -1;
+ }
+
+ /* We are about to mangle packet header. Be careful!
+ Do not damage packets queued somewhere.
+ */
+ if (skb_cloned(skb)) {
+ struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
+ kfree_skb(skb);
+ /* the copy is a forwarded packet */
+ if (skb2 == NULL) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_OUTDISCARDS);
+ return -1;
+ }
+ *skbp = skb = skb2;
+ opt = IP6CB(skb2);
+ hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+ }
+
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ i = n - --hdr->segments_left;
+
+ rthdr = (struct rt0_hdr *) hdr;
+ addr = rthdr->addr;
+ addr += i - 1;
+
+ if (ipv6_addr_is_multicast(addr)) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+ kfree_skb(skb);
+ return -1;
+ }
+
+ ipv6_addr_copy(&daddr, addr);
+ ipv6_addr_copy(addr, &skb->nh.ipv6h->daddr);
+ ipv6_addr_copy(&skb->nh.ipv6h->daddr, &daddr);
+
+ dst_release(xchg(&skb->dst, NULL));
+ ip6_route_input(skb);
+ if (skb->dst->error) {
+ skb_push(skb, skb->data - skb->nh.raw);
+ dst_input(skb);
+ return -1;
+ }
+
+ if (skb->dst->dev->flags&IFF_LOOPBACK) {
+ if (skb->nh.ipv6h->hop_limit <= 1) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ 0, skb->dev);
+ kfree_skb(skb);
+ return -1;
+ }
+ skb->nh.ipv6h->hop_limit--;
+ goto looped_back;
+ }
+
+ skb_push(skb, skb->data - skb->nh.raw);
+ dst_input(skb);
+ return -1;
+}
+
+static struct inet6_protocol rthdr_protocol = {
+ .handler = ipv6_rthdr_rcv,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+void __init ipv6_rthdr_init(void)
+{
+ if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
+ printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
+};
+
+/*
+ This function inverts received rthdr.
+ NOTE: specs allow to make it automatically only if
+ packet authenticated.
+
+ I will not discuss it here (though, I am really pissed off at
+ this stupid requirement making rthdr idea useless)
+
+ Actually, it creates severe problems for us.
+ Embryonic requests has no associated sockets,
+ so that user have no control over it and
+ cannot not only to set reply options, but
+ even to know, that someone wants to connect
+ without success. :-(
+
+ For now we need to test the engine, so that I created
+ temporary (or permanent) backdoor.
+ If listening socket set IPV6_RTHDR to 2, then we invert header.
+ --ANK (980729)
+ */
+
+struct ipv6_txoptions *
+ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr)
+{
+ /* Received rthdr:
+
+ [ H1 -> H2 -> ... H_prev ] daddr=ME
+
+ Inverted result:
+ [ H_prev -> ... -> H1 ] daddr =sender
+
+ Note, that IP output engine will rewrite this rthdr
+ by rotating it left by one addr.
+ */
+
+ int n, i;
+ struct rt0_hdr *rthdr = (struct rt0_hdr*)hdr;
+ struct rt0_hdr *irthdr;
+ struct ipv6_txoptions *opt;
+ int hdrlen = ipv6_optlen(hdr);
+
+ if (hdr->segments_left ||
+ hdr->type != IPV6_SRCRT_TYPE_0 ||
+ hdr->hdrlen & 0x01)
+ return NULL;
+
+ n = hdr->hdrlen >> 1;
+ opt = sock_kmalloc(sk, sizeof(*opt) + hdrlen, GFP_ATOMIC);
+ if (opt == NULL)
+ return NULL;
+ memset(opt, 0, sizeof(*opt));
+ opt->tot_len = sizeof(*opt) + hdrlen;
+ opt->srcrt = (void*)(opt+1);
+ opt->opt_nflen = hdrlen;
+
+ memcpy(opt->srcrt, hdr, sizeof(*hdr));
+ irthdr = (struct rt0_hdr*)opt->srcrt;
+ /* Obsolete field, MBZ, when originated by us */
+ irthdr->bitmap = 0;
+ opt->srcrt->segments_left = n;
+ for (i=0; i<n; i++)
+ memcpy(irthdr->addr+i, rthdr->addr+(n-1-i), 16);
+ return opt;
+}
+
+/**********************************
+ Hop-by-hop options.
+ **********************************/
+
+/* Router Alert as of RFC 2711 */
+
+static int ipv6_hop_ra(struct sk_buff *skb, int optoff)
+{
+ if (skb->nh.raw[optoff+1] == 2) {
+ IP6CB(skb)->ra = optoff;
+ return 1;
+ }
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "ipv6_hop_ra: wrong RA length %d\n", skb->nh.raw[optoff+1]));
+ kfree_skb(skb);
+ return 0;
+}
+
+/* Jumbo payload */
+
+static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
+{
+ u32 pkt_len;
+
+ if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", skb->nh.raw[optoff+1]));
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ goto drop;
+ }
+
+ pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
+ if (pkt_len <= IPV6_MAXPLEN) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ return 0;
+ }
+ if (skb->nh.ipv6h->payload_len) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff);
+ return 0;
+ }
+
+ if (pkt_len > skb->len - sizeof(struct ipv6hdr)) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+ goto drop;
+ }
+ if (pkt_len + sizeof(struct ipv6hdr) < skb->len) {
+ __pskb_trim(skb, pkt_len + sizeof(struct ipv6hdr));
+ if (skb->ip_summed == CHECKSUM_HW)
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ return 1;
+
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static struct tlvtype_proc tlvprochopopt_lst[] = {
+ {
+ .type = IPV6_TLV_ROUTERALERT,
+ .func = ipv6_hop_ra,
+ },
+ {
+ .type = IPV6_TLV_JUMBO,
+ .func = ipv6_hop_jumbo,
+ },
+ { -1, }
+};
+
+int ipv6_parse_hopopts(struct sk_buff *skb, int nhoff)
+{
+ IP6CB(skb)->hop = sizeof(struct ipv6hdr);
+ if (ip6_parse_tlv(tlvprochopopt_lst, skb))
+ return sizeof(struct ipv6hdr);
+ return -1;
+}
+
+/*
+ * Creating outbound headers.
+ *
+ * "build" functions work when skb is filled from head to tail (datagram)
+ * "push" functions work when headers are added from tail to head (tcp)
+ *
+ * In both cases we assume, that caller reserved enough room
+ * for headers.
+ */
+
+static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
+ struct ipv6_rt_hdr *opt,
+ struct in6_addr **addr_p)
+{
+ struct rt0_hdr *phdr, *ihdr;
+ int hops;
+
+ ihdr = (struct rt0_hdr *) opt;
+
+ phdr = (struct rt0_hdr *) skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3);
+ memcpy(phdr, ihdr, sizeof(struct rt0_hdr));
+
+ hops = ihdr->rt_hdr.hdrlen >> 1;
+
+ if (hops > 1)
+ memcpy(phdr->addr, ihdr->addr + 1,
+ (hops - 1) * sizeof(struct in6_addr));
+
+ ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+ *addr_p = ihdr->addr;
+
+ phdr->rt_hdr.nexthdr = *proto;
+ *proto = NEXTHDR_ROUTING;
+}
+
+static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt)
+{
+ struct ipv6_opt_hdr *h = (struct ipv6_opt_hdr *)skb_push(skb, ipv6_optlen(opt));
+
+ memcpy(h, opt, ipv6_optlen(opt));
+ h->nexthdr = *proto;
+ *proto = type;
+}
+
+void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt,
+ u8 *proto,
+ struct in6_addr **daddr)
+{
+ if (opt->srcrt)
+ ipv6_push_rthdr(skb, proto, opt->srcrt, daddr);
+ if (opt->dst0opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt);
+ if (opt->hopopt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt);
+}
+
+void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto)
+{
+ if (opt->dst1opt)
+ ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt);
+}
+
+struct ipv6_txoptions *
+ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
+{
+ struct ipv6_txoptions *opt2;
+
+ opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC);
+ if (opt2) {
+ long dif = (char*)opt2 - (char*)opt;
+ memcpy(opt2, opt, opt->tot_len);
+ if (opt2->hopopt)
+ *((char**)&opt2->hopopt) += dif;
+ if (opt2->dst0opt)
+ *((char**)&opt2->dst0opt) += dif;
+ if (opt2->dst1opt)
+ *((char**)&opt2->dst1opt) += dif;
+ if (opt2->srcrt)
+ *((char**)&opt2->srcrt) += dif;
+ }
+ return opt2;
+}
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
new file mode 100644
index 000000000000..6dda815c013f
--- /dev/null
+++ b/net/ipv6/exthdrs_core.c
@@ -0,0 +1,109 @@
+/*
+ * IPv6 library code, needed by static components when full IPv6 support is
+ * not configured or static.
+ */
+#include <net/ipv6.h>
+
+/*
+ * find out if nexthdr is a well-known extension header or a protocol
+ */
+
+int ipv6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return ( (nexthdr == NEXTHDR_HOP) ||
+ (nexthdr == NEXTHDR_ROUTING) ||
+ (nexthdr == NEXTHDR_FRAGMENT) ||
+ (nexthdr == NEXTHDR_AUTH) ||
+ (nexthdr == NEXTHDR_NONE) ||
+ (nexthdr == NEXTHDR_DEST) );
+}
+
+/*
+ * Skip any extension headers. This is used by the ICMP module.
+ *
+ * Note that strictly speaking this conflicts with RFC 2460 4.0:
+ * ...The contents and semantics of each extension header determine whether
+ * or not to proceed to the next header. Therefore, extension headers must
+ * be processed strictly in the order they appear in the packet; a
+ * receiver must not, for example, scan through a packet looking for a
+ * particular kind of extension header and process that header prior to
+ * processing all preceding ones.
+ *
+ * We do exactly this. This is a protocol bug. We can't decide after a
+ * seeing an unknown discard-with-error flavour TLV option if it's a
+ * ICMP error message or not (errors should never be send in reply to
+ * ICMP error messages).
+ *
+ * But I see no other way to do this. This might need to be reexamined
+ * when Linux implements ESP (and maybe AUTH) headers.
+ * --AK
+ *
+ * This function parses (probably truncated) exthdr set "hdr"
+ * of length "len". "nexthdrp" initially points to some place,
+ * where type of the first header can be found.
+ *
+ * It skips all well-known exthdrs, and returns pointer to the start
+ * of unparsable area i.e. the first header with unknown type.
+ * If it is not NULL *nexthdr is updated by type/protocol of this header.
+ *
+ * NOTES: - if packet terminated with NEXTHDR_NONE it returns NULL.
+ * - it may return pointer pointing beyond end of packet,
+ * if the last recognized header is truncated in the middle.
+ * - if packet is truncated, so that all parsed headers are skipped,
+ * it returns NULL.
+ * - First fragment header is skipped, not-first ones
+ * are considered as unparsable.
+ * - ESP is unparsable for now and considered like
+ * normal payload protocol.
+ * - Note also special handling of AUTH header. Thanks to IPsec wizards.
+ *
+ * --ANK (980726)
+ */
+
+int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, int len)
+{
+ u8 nexthdr = *nexthdrp;
+
+ while (ipv6_ext_hdr(nexthdr)) {
+ struct ipv6_opt_hdr _hdr, *hp;
+ int hdrlen;
+
+ if (len < (int)sizeof(struct ipv6_opt_hdr))
+ return -1;
+ if (nexthdr == NEXTHDR_NONE)
+ return -1;
+ hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
+ if (hp == NULL)
+ BUG();
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ unsigned short _frag_off, *fp;
+ fp = skb_header_pointer(skb,
+ start+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_frag_off),
+ &_frag_off);
+ if (fp == NULL)
+ return -1;
+
+ if (ntohs(*fp) & ~0x7)
+ break;
+ hdrlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH)
+ hdrlen = (hp->hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ nexthdr = hp->nexthdr;
+ len -= hdrlen;
+ start += hdrlen;
+ }
+
+ *nexthdrp = nexthdr;
+ return start;
+}
+
+EXPORT_SYMBOL(ipv6_ext_hdr);
+EXPORT_SYMBOL(ipv6_skip_exthdr);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
new file mode 100644
index 000000000000..87b9082ceab2
--- /dev/null
+++ b/net/ipv6/icmp.c
@@ -0,0 +1,822 @@
+/*
+ * Internet Control Message Protocol (ICMPv6)
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * $Id: icmp.c,v 1.38 2002/02/08 03:57:19 davem Exp $
+ *
+ * Based on net/ipv4/icmp.c
+ *
+ * RFC 1885
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Andi Kleen : exception handling
+ * Andi Kleen add rate limits. never reply to a icmp.
+ * add more length checks and other fixes.
+ * yoshfuji : ensure to sent parameter problem for
+ * fragments.
+ * YOSHIFUJI Hideaki @USAGI: added sysctl for icmp rate limit.
+ * Randy Dunlap and
+ * YOSHIFUJI Hideaki @USAGI: Per-interface statistics support
+ * Kazunori MIYAZAWA @USAGI: change output process to use ip6_append_data
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sockios.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/icmpv6.h>
+
+#include <net/ip.h>
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ip6_checksum.h>
+#include <net/protocol.h>
+#include <net/raw.h>
+#include <net/rawv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics);
+
+/*
+ * The ICMP socket(s). This is the most convenient way to flow control
+ * our ICMP output as well as maintain a clean interface throughout
+ * all layers. All Socketless IP sends will soon be gone.
+ *
+ * On SMP we have one ICMP socket per-cpu.
+ */
+static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL;
+#define icmpv6_socket __get_cpu_var(__icmpv6_socket)
+
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+
+static struct inet6_protocol icmpv6_protocol = {
+ .handler = icmpv6_rcv,
+ .flags = INET6_PROTO_FINAL,
+};
+
+static __inline__ int icmpv6_xmit_lock(void)
+{
+ local_bh_disable();
+
+ if (unlikely(!spin_trylock(&icmpv6_socket->sk->sk_lock.slock))) {
+ /* This can happen if the output path (f.e. SIT or
+ * ip6ip6 tunnel) signals dst_link_failure() for an
+ * outgoing ICMP6 packet.
+ */
+ local_bh_enable();
+ return 1;
+ }
+ return 0;
+}
+
+static __inline__ void icmpv6_xmit_unlock(void)
+{
+ spin_unlock_bh(&icmpv6_socket->sk->sk_lock.slock);
+}
+
+/*
+ * Slightly more convenient version of icmpv6_send.
+ */
+void icmpv6_param_prob(struct sk_buff *skb, int code, int pos)
+{
+ icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev);
+ kfree_skb(skb);
+}
+
+/*
+ * Figure out, may we reply to this packet with icmp error.
+ *
+ * We do not reply, if:
+ * - it was icmp error message.
+ * - it is truncated, so that it is known, that protocol is ICMPV6
+ * (i.e. in the middle of some exthdr)
+ *
+ * --ANK (980726)
+ */
+
+static int is_ineligible(struct sk_buff *skb)
+{
+ int ptr = (u8*)(skb->nh.ipv6h+1) - skb->data;
+ int len = skb->len - ptr;
+ __u8 nexthdr = skb->nh.ipv6h->nexthdr;
+
+ if (len < 0)
+ return 1;
+
+ ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, len);
+ if (ptr < 0)
+ return 0;
+ if (nexthdr == IPPROTO_ICMPV6) {
+ u8 _type, *tp;
+ tp = skb_header_pointer(skb,
+ ptr+offsetof(struct icmp6hdr, icmp6_type),
+ sizeof(_type), &_type);
+ if (tp == NULL ||
+ !(*tp & ICMPV6_INFOMSG_MASK))
+ return 1;
+ }
+ return 0;
+}
+
+static int sysctl_icmpv6_time = 1*HZ;
+
+/*
+ * Check the ICMP output rate limit
+ */
+static inline int icmpv6_xrlim_allow(struct sock *sk, int type,
+ struct flowi *fl)
+{
+ struct dst_entry *dst;
+ int res = 0;
+
+ /* Informational messages are not limited. */
+ if (type & ICMPV6_INFOMSG_MASK)
+ return 1;
+
+ /* Do not limit pmtu discovery, it would break it. */
+ if (type == ICMPV6_PKT_TOOBIG)
+ return 1;
+
+ /*
+ * Look up the output route.
+ * XXX: perhaps the expire for routing entries cloned by
+ * this lookup should be more aggressive (not longer than timeout).
+ */
+ dst = ip6_route_output(sk, fl);
+ if (dst->error) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
+ res = 1;
+ } else {
+ struct rt6_info *rt = (struct rt6_info *)dst;
+ int tmo = sysctl_icmpv6_time;
+
+ /* Give more bandwidth to wider prefixes. */
+ if (rt->rt6i_dst.plen < 128)
+ tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
+
+ res = xrlim_allow(dst, tmo);
+ }
+ dst_release(dst);
+ return res;
+}
+
+/*
+ * an inline helper for the "simple" if statement below
+ * checks if parameter problem report is caused by an
+ * unrecognized IPv6 option that has the Option Type
+ * highest-order two bits set to 10
+ */
+
+static __inline__ int opt_unrec(struct sk_buff *skb, __u32 offset)
+{
+ u8 _optval, *op;
+
+ offset += skb->nh.raw - skb->data;
+ op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
+ if (op == NULL)
+ return 1;
+ return (*op & 0xC0) == 0x80;
+}
+
+static int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
+{
+ struct sk_buff *skb;
+ struct icmp6hdr *icmp6h;
+ int err = 0;
+
+ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
+ goto out;
+
+ icmp6h = (struct icmp6hdr*) skb->h.raw;
+ memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
+ icmp6h->icmp6_cksum = 0;
+
+ if (skb_queue_len(&sk->sk_write_queue) == 1) {
+ skb->csum = csum_partial((char *)icmp6h,
+ sizeof(struct icmp6hdr), skb->csum);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
+ &fl->fl6_dst,
+ len, fl->proto,
+ skb->csum);
+ } else {
+ u32 tmp_csum = 0;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ tmp_csum = csum_add(tmp_csum, skb->csum);
+ }
+
+ tmp_csum = csum_partial((char *)icmp6h,
+ sizeof(struct icmp6hdr), tmp_csum);
+ tmp_csum = csum_ipv6_magic(&fl->fl6_src,
+ &fl->fl6_dst,
+ len, fl->proto, tmp_csum);
+ icmp6h->icmp6_cksum = tmp_csum;
+ }
+ if (icmp6h->icmp6_cksum == 0)
+ icmp6h->icmp6_cksum = -1;
+ ip6_push_pending_frames(sk);
+out:
+ return err;
+}
+
+struct icmpv6_msg {
+ struct sk_buff *skb;
+ int offset;
+};
+
+static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+{
+ struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
+ struct sk_buff *org_skb = msg->skb;
+ __u32 csum = 0;
+
+ csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
+ to, len, csum);
+ skb->csum = csum_block_add(skb->csum, csum, odd);
+ return 0;
+}
+
+/*
+ * Send an ICMP message in response to a packet in error
+ */
+void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
+ struct net_device *dev)
+{
+ struct inet6_dev *idev = NULL;
+ struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct sock *sk = icmpv6_socket->sk;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *saddr = NULL;
+ struct dst_entry *dst;
+ struct icmp6hdr tmp_hdr;
+ struct flowi fl;
+ struct icmpv6_msg msg;
+ int iif = 0;
+ int addr_type = 0;
+ int len;
+ int hlimit;
+ int err = 0;
+
+ if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+ return;
+
+ /*
+ * Make sure we respect the rules
+ * i.e. RFC 1885 2.4(e)
+ * Rule (e.1) is enforced by not using icmpv6_send
+ * in any code that processes icmp errors.
+ */
+ addr_type = ipv6_addr_type(&hdr->daddr);
+
+ if (ipv6_chk_addr(&hdr->daddr, skb->dev, 0))
+ saddr = &hdr->daddr;
+
+ /*
+ * Dest addr check
+ */
+
+ if ((addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST)) {
+ if (type != ICMPV6_PKT_TOOBIG &&
+ !(type == ICMPV6_PARAMPROB &&
+ code == ICMPV6_UNK_OPTION &&
+ (opt_unrec(skb, info))))
+ return;
+
+ saddr = NULL;
+ }
+
+ addr_type = ipv6_addr_type(&hdr->saddr);
+
+ /*
+ * Source addr check
+ */
+
+ if (addr_type & IPV6_ADDR_LINKLOCAL)
+ iif = skb->dev->ifindex;
+
+ /*
+ * Must not send if we know that source is Anycast also.
+ * for now we don't know that.
+ */
+ if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "icmpv6_send: addr_any/mcast source\n"));
+ return;
+ }
+
+ /*
+ * Never answer to a ICMP packet.
+ */
+ if (is_ineligible(skb)) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "icmpv6_send: no reply to icmp error\n"));
+ return;
+ }
+
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = IPPROTO_ICMPV6;
+ ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+ if (saddr)
+ ipv6_addr_copy(&fl.fl6_src, saddr);
+ fl.oif = iif;
+ fl.fl_icmp_type = type;
+ fl.fl_icmp_code = code;
+
+ if (icmpv6_xmit_lock())
+ return;
+
+ if (!icmpv6_xrlim_allow(sk, type, &fl))
+ goto out;
+
+ tmp_hdr.icmp6_type = type;
+ tmp_hdr.icmp6_code = code;
+ tmp_hdr.icmp6_cksum = 0;
+ tmp_hdr.icmp6_pointer = htonl(info);
+
+ if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+ fl.oif = np->mcast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto out;
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ goto out_dst_release;
+
+ if (ipv6_addr_is_multicast(&fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+ if (hlimit < 0)
+ hlimit = ipv6_get_hoplimit(dst->dev);
+
+ msg.skb = skb;
+ msg.offset = skb->nh.raw - skb->data;
+
+ len = skb->len - msg.offset;
+ len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
+ if (len < 0) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "icmp: len problem\n"));
+ goto out_dst_release;
+ }
+
+ idev = in6_dev_get(skb->dev);
+
+ err = ip6_append_data(sk, icmpv6_getfrag, &msg,
+ len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr),
+ hlimit, NULL, &fl, (struct rt6_info*)dst,
+ MSG_DONTWAIT);
+ if (err) {
+ ip6_flush_pending_frames(sk);
+ goto out_put;
+ }
+ err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
+
+ if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+ ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_OUTDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
+ ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+
+out_put:
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+out_dst_release:
+ dst_release(dst);
+out:
+ icmpv6_xmit_unlock();
+}
+
+static void icmpv6_echo_reply(struct sk_buff *skb)
+{
+ struct sock *sk = icmpv6_socket->sk;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_addr *saddr = NULL;
+ struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+ struct icmp6hdr tmp_hdr;
+ struct flowi fl;
+ struct icmpv6_msg msg;
+ struct dst_entry *dst;
+ int err = 0;
+ int hlimit;
+
+ saddr = &skb->nh.ipv6h->daddr;
+
+ if (!ipv6_unicast_destination(skb))
+ saddr = NULL;
+
+ memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
+ tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+
+ memset(&fl, 0, sizeof(fl));
+ fl.proto = IPPROTO_ICMPV6;
+ ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
+ if (saddr)
+ ipv6_addr_copy(&fl.fl6_src, saddr);
+ fl.oif = skb->dev->ifindex;
+ fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+
+ if (icmpv6_xmit_lock())
+ return;
+
+ if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
+ fl.oif = np->mcast_oif;
+
+ err = ip6_dst_lookup(sk, &dst, &fl);
+ if (err)
+ goto out;
+ if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
+ goto out_dst_release;
+
+ if (ipv6_addr_is_multicast(&fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+ if (hlimit < 0)
+ hlimit = ipv6_get_hoplimit(dst->dev);
+
+ idev = in6_dev_get(skb->dev);
+
+ msg.skb = skb;
+ msg.offset = 0;
+
+ err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), hlimit, NULL, &fl,
+ (struct rt6_info*)dst, MSG_DONTWAIT);
+
+ if (err) {
+ ip6_flush_pending_frames(sk);
+ goto out_put;
+ }
+ err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
+
+ ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTECHOREPLIES);
+ ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
+
+out_put:
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+out_dst_release:
+ dst_release(dst);
+out:
+ icmpv6_xmit_unlock();
+}
+
+static void icmpv6_notify(struct sk_buff *skb, int type, int code, u32 info)
+{
+ struct in6_addr *saddr, *daddr;
+ struct inet6_protocol *ipprot;
+ struct sock *sk;
+ int inner_offset;
+ int hash;
+ u8 nexthdr;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return;
+
+ nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
+ if (ipv6_ext_hdr(nexthdr)) {
+ /* now skip over extension headers */
+ inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, skb->len - sizeof(struct ipv6hdr));
+ if (inner_offset<0)
+ return;
+ } else {
+ inner_offset = sizeof(struct ipv6hdr);
+ }
+
+ /* Checkin header including 8 bytes of inner protocol header. */
+ if (!pskb_may_pull(skb, inner_offset+8))
+ return;
+
+ saddr = &skb->nh.ipv6h->saddr;
+ daddr = &skb->nh.ipv6h->daddr;
+
+ /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
+ Without this we will not able f.e. to make source routed
+ pmtu discovery.
+ Corresponding argument (opt) to notifiers is already added.
+ --ANK (980726)
+ */
+
+ hash = nexthdr & (MAX_INET_PROTOS - 1);
+
+ rcu_read_lock();
+ ipprot = rcu_dereference(inet6_protos[hash]);
+ if (ipprot && ipprot->err_handler)
+ ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+ rcu_read_unlock();
+
+ read_lock(&raw_v6_lock);
+ if ((sk = sk_head(&raw_v6_htable[hash])) != NULL) {
+ while((sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr))) {
+ rawv6_err(sk, skb, NULL, type, code, inner_offset, info);
+ sk = sk_next(sk);
+ }
+ }
+ read_unlock(&raw_v6_lock);
+}
+
+/*
+ * Handle icmp messages
+ */
+
+static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+ struct sk_buff *skb = *pskb;
+ struct net_device *dev = skb->dev;
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ struct in6_addr *saddr, *daddr;
+ struct ipv6hdr *orig_hdr;
+ struct icmp6hdr *hdr;
+ int type;
+
+ ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS);
+
+ saddr = &skb->nh.ipv6h->saddr;
+ daddr = &skb->nh.ipv6h->daddr;
+
+ /* Perform checksum. */
+ if (skb->ip_summed == CHECKSUM_HW) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+ skb->csum)) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "ICMPv6 hw checksum failed\n"));
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+ }
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ if (csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6,
+ skb_checksum(skb, 0, skb->len, 0))) {
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+ NIP6(*saddr), NIP6(*daddr)));
+ goto discard_it;
+ }
+ }
+
+ if (!pskb_pull(skb, sizeof(struct icmp6hdr)))
+ goto discard_it;
+
+ hdr = (struct icmp6hdr *) skb->h.raw;
+
+ type = hdr->icmp6_type;
+
+ if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+ ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INDESTUNREACHS, type - ICMPV6_DEST_UNREACH);
+ else if (type >= ICMPV6_ECHO_REQUEST && type <= NDISC_REDIRECT)
+ ICMP6_INC_STATS_OFFSET_BH(idev, ICMP6_MIB_INECHOS, type - ICMPV6_ECHO_REQUEST);
+
+ switch (type) {
+ case ICMPV6_ECHO_REQUEST:
+ icmpv6_echo_reply(skb);
+ break;
+
+ case ICMPV6_ECHO_REPLY:
+ /* we couldn't care less */
+ break;
+
+ case ICMPV6_PKT_TOOBIG:
+ /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
+ standard destination cache. Seems, only "advanced"
+ destination cache will allow to solve this problem
+ --ANK (980726)
+ */
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto discard_it;
+ hdr = (struct icmp6hdr *) skb->h.raw;
+ orig_hdr = (struct ipv6hdr *) (hdr + 1);
+ rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev,
+ ntohl(hdr->icmp6_mtu));
+
+ /*
+ * Drop through to notify
+ */
+
+ case ICMPV6_DEST_UNREACH:
+ case ICMPV6_TIME_EXCEED:
+ case ICMPV6_PARAMPROB:
+ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ break;
+
+ case NDISC_ROUTER_SOLICITATION:
+ case NDISC_ROUTER_ADVERTISEMENT:
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ case NDISC_REDIRECT:
+ ndisc_rcv(skb);
+ break;
+
+ case ICMPV6_MGM_QUERY:
+ igmp6_event_query(skb);
+ break;
+
+ case ICMPV6_MGM_REPORT:
+ igmp6_event_report(skb);
+ break;
+
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_NI_QUERY:
+ case ICMPV6_NI_REPLY:
+ case ICMPV6_MLD2_REPORT:
+ case ICMPV6_DHAAD_REQUEST:
+ case ICMPV6_DHAAD_REPLY:
+ case ICMPV6_MOBILE_PREFIX_SOL:
+ case ICMPV6_MOBILE_PREFIX_ADV:
+ break;
+
+ default:
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "icmpv6: msg of unknown type\n"));
+
+ /* informational */
+ if (type & ICMPV6_INFOMSG_MASK)
+ break;
+
+ /*
+ * error of unknown type.
+ * must pass to upper level
+ */
+
+ icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ };
+ kfree_skb(skb);
+ return 0;
+
+discard_it:
+ ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS);
+ kfree_skb(skb);
+ return 0;
+}
+
+int __init icmpv6_init(struct net_proto_family *ops)
+{
+ struct sock *sk;
+ int err, i, j;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_possible(i))
+ continue;
+
+ err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6,
+ &per_cpu(__icmpv6_socket, i));
+ if (err < 0) {
+ printk(KERN_ERR
+ "Failed to initialize the ICMP6 control socket "
+ "(err %d).\n",
+ err);
+ goto fail;
+ }
+
+ sk = per_cpu(__icmpv6_socket, i)->sk;
+ sk->sk_allocation = GFP_ATOMIC;
+
+ /* Enough space for 2 64K ICMP packets, including
+ * sk_buff struct overhead.
+ */
+ sk->sk_sndbuf =
+ (2 * ((64 * 1024) + sizeof(struct sk_buff)));
+
+ sk->sk_prot->unhash(sk);
+ }
+
+
+ if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
+ printk(KERN_ERR "Failed to register ICMP6 protocol\n");
+ err = -EAGAIN;
+ goto fail;
+ }
+
+ return 0;
+
+ fail:
+ for (j = 0; j < i; j++) {
+ if (!cpu_possible(j))
+ continue;
+ sock_release(per_cpu(__icmpv6_socket, j));
+ }
+
+ return err;
+}
+
+void icmpv6_cleanup(void)
+{
+ int i;
+
+ for (i = 0; i < NR_CPUS; i++) {
+ if (!cpu_possible(i))
+ continue;
+ sock_release(per_cpu(__icmpv6_socket, i));
+ }
+ inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
+}
+
+static struct icmp6_err {
+ int err;
+ int fatal;
+} tab_unreach[] = {
+ { /* NOROUTE */
+ .err = ENETUNREACH,
+ .fatal = 0,
+ },
+ { /* ADM_PROHIBITED */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* Was NOT_NEIGHBOUR, now reserved */
+ .err = EHOSTUNREACH,
+ .fatal = 0,
+ },
+ { /* ADDR_UNREACH */
+ .err = EHOSTUNREACH,
+ .fatal = 0,
+ },
+ { /* PORT_UNREACH */
+ .err = ECONNREFUSED,
+ .fatal = 1,
+ },
+};
+
+int icmpv6_err_convert(int type, int code, int *err)
+{
+ int fatal = 0;
+
+ *err = EPROTO;
+
+ switch (type) {
+ case ICMPV6_DEST_UNREACH:
+ fatal = 1;
+ if (code <= ICMPV6_PORT_UNREACH) {
+ *err = tab_unreach[code].err;
+ fatal = tab_unreach[code].fatal;
+ }
+ break;
+
+ case ICMPV6_PKT_TOOBIG:
+ *err = EMSGSIZE;
+ break;
+
+ case ICMPV6_PARAMPROB:
+ *err = EPROTO;
+ fatal = 1;
+ break;
+
+ case ICMPV6_TIME_EXCEED:
+ *err = EHOSTUNREACH;
+ break;
+ };
+
+ return fatal;
+}
+
+#ifdef CONFIG_SYSCTL
+ctl_table ipv6_icmp_table[] = {
+ {
+ .ctl_name = NET_IPV6_ICMP_RATELIMIT,
+ .procname = "ratelimit",
+ .data = &sysctl_icmpv6_time,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ { .ctl_name = 0 },
+};
+#endif
+
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
new file mode 100644
index 000000000000..405740b75abb
--- /dev/null
+++ b/net/ipv6/ip6_fib.c
@@ -0,0 +1,1225 @@
+/*
+ * Linux INET6 implementation
+ * Forwarding Information Database
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ * Yuji SEKIYA @USAGI: Support default route on router node;
+ * remove ip6_null_entry from the top of
+ * routing table.
+ */
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/route.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/init.h>
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#endif
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+
+#include <net/ip6_fib.h>
+#include <net/ip6_route.h>
+
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) printk(KERN_DEBUG x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
+struct rt6_statistics rt6_stats;
+
+static kmem_cache_t * fib6_node_kmem;
+
+enum fib_walk_state_t
+{
+#ifdef CONFIG_IPV6_SUBTREES
+ FWS_S,
+#endif
+ FWS_L,
+ FWS_R,
+ FWS_C,
+ FWS_U
+};
+
+struct fib6_cleaner_t
+{
+ struct fib6_walker_t w;
+ int (*func)(struct rt6_info *, void *arg);
+ void *arg;
+};
+
+DEFINE_RWLOCK(fib6_walker_lock);
+
+
+#ifdef CONFIG_IPV6_SUBTREES
+#define FWS_INIT FWS_S
+#define SUBTREE(fn) ((fn)->subtree)
+#else
+#define FWS_INIT FWS_L
+#define SUBTREE(fn) NULL
+#endif
+
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
+static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
+
+/*
+ * A routing update causes an increase of the serial number on the
+ * affected subtree. This allows for cached routes to be asynchronously
+ * tested when modifications are made to the destination cache as a
+ * result of redirects, path MTU changes, etc.
+ */
+
+static __u32 rt_sernum;
+
+static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0);
+
+struct fib6_walker_t fib6_walker_list = {
+ .prev = &fib6_walker_list,
+ .next = &fib6_walker_list,
+};
+
+#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
+
+static __inline__ u32 fib6_new_sernum(void)
+{
+ u32 n = ++rt_sernum;
+ if ((__s32)n <= 0)
+ rt_sernum = n = 1;
+ return n;
+}
+
+/*
+ * Auxiliary address test functions for the radix tree.
+ *
+ * These assume a 32bit processor (although it will work on
+ * 64bit processors)
+ */
+
+/*
+ * test bit
+ */
+
+static __inline__ int addr_bit_set(void *token, int fn_bit)
+{
+ __u32 *addr = token;
+
+ return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+}
+
+/*
+ * find the first different bit between two addresses
+ * length of address must be a multiple of 32bits
+ */
+
+static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
+{
+ __u32 *a1 = token1;
+ __u32 *a2 = token2;
+ int i;
+
+ addrlen >>= 2;
+
+ for (i = 0; i < addrlen; i++) {
+ __u32 xb;
+
+ xb = a1[i] ^ a2[i];
+
+ if (xb) {
+ int j = 31;
+
+ xb = ntohl(xb);
+
+ while ((xb & (1 << j)) == 0)
+ j--;
+
+ return (i * 32 + 31 - j);
+ }
+ }
+
+ /*
+ * we should *never* get to this point since that
+ * would mean the addrs are equal
+ *
+ * However, we do get to it 8) And exacly, when
+ * addresses are equal 8)
+ *
+ * ip route add 1111::/128 via ...
+ * ip route add 1111::/64 via ...
+ * and we are here.
+ *
+ * Ideally, this function should stop comparison
+ * at prefix length. It does not, but it is still OK,
+ * if returned value is greater than prefix length.
+ * --ANK (980803)
+ */
+
+ return addrlen<<5;
+}
+
+static __inline__ struct fib6_node * node_alloc(void)
+{
+ struct fib6_node *fn;
+
+ if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
+ memset(fn, 0, sizeof(struct fib6_node));
+
+ return fn;
+}
+
+static __inline__ void node_free(struct fib6_node * fn)
+{
+ kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static __inline__ void rt6_release(struct rt6_info *rt)
+{
+ if (atomic_dec_and_test(&rt->rt6i_ref))
+ dst_free(&rt->u.dst);
+}
+
+
+/*
+ * Routing Table
+ *
+ * return the appropriate node for a routing tree "add" operation
+ * by either creating and inserting or by returning an existing
+ * node.
+ */
+
+static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
+ int addrlen, int plen,
+ int offset)
+{
+ struct fib6_node *fn, *in, *ln;
+ struct fib6_node *pn = NULL;
+ struct rt6key *key;
+ int bit;
+ int dir = 0;
+ __u32 sernum = fib6_new_sernum();
+
+ RT6_TRACE("fib6_add_1\n");
+
+ /* insert node in tree */
+
+ fn = root;
+
+ do {
+ key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+ goto insert_above;
+
+ /*
+ * Exact match ?
+ */
+
+ if (plen == fn->fn_bit) {
+ /* clean up an intermediate node */
+ if ((fn->fn_flags & RTN_RTINFO) == 0) {
+ rt6_release(fn->leaf);
+ fn->leaf = NULL;
+ }
+
+ fn->fn_sernum = sernum;
+
+ return fn;
+ }
+
+ /*
+ * We have more bits to go
+ */
+
+ /* Try to walk down on tree. */
+ fn->fn_sernum = sernum;
+ dir = addr_bit_set(addr, fn->fn_bit);
+ pn = fn;
+ fn = dir ? fn->right: fn->left;
+ } while (fn);
+
+ /*
+ * We walked to the bottom of tree.
+ * Create new leaf node without children.
+ */
+
+ ln = node_alloc();
+
+ if (ln == NULL)
+ return NULL;
+ ln->fn_bit = plen;
+
+ ln->parent = pn;
+ ln->fn_sernum = sernum;
+
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ return ln;
+
+
+insert_above:
+ /*
+ * split since we don't have a common prefix anymore or
+ * we have a less significant route.
+ * we've to insert an intermediate node on the list
+ * this new node will point to the one we need to create
+ * and the current
+ */
+
+ pn = fn->parent;
+
+ /* find 1st bit in difference between the 2 addrs.
+
+ See comment in addr_diff: bit may be an invalid value,
+ but if it is >= plen, the value is ignored in any case.
+ */
+
+ bit = addr_diff(addr, &key->addr, addrlen);
+
+ /*
+ * (intermediate)[in]
+ * / \
+ * (new leaf node)[ln] (old node)[fn]
+ */
+ if (plen > bit) {
+ in = node_alloc();
+ ln = node_alloc();
+
+ if (in == NULL || ln == NULL) {
+ if (in)
+ node_free(in);
+ if (ln)
+ node_free(ln);
+ return NULL;
+ }
+
+ /*
+ * new intermediate node.
+ * RTN_RTINFO will
+ * be off since that an address that chooses one of
+ * the branches would not match less specific routes
+ * in the other branch
+ */
+
+ in->fn_bit = bit;
+
+ in->parent = pn;
+ in->leaf = fn->leaf;
+ atomic_inc(&in->leaf->rt6i_ref);
+
+ in->fn_sernum = sernum;
+
+ /* update parent pointer */
+ if (dir)
+ pn->right = in;
+ else
+ pn->left = in;
+
+ ln->fn_bit = plen;
+
+ ln->parent = in;
+ fn->parent = in;
+
+ ln->fn_sernum = sernum;
+
+ if (addr_bit_set(addr, bit)) {
+ in->right = ln;
+ in->left = fn;
+ } else {
+ in->left = ln;
+ in->right = fn;
+ }
+ } else { /* plen <= bit */
+
+ /*
+ * (new leaf node)[ln]
+ * / \
+ * (old node)[fn] NULL
+ */
+
+ ln = node_alloc();
+
+ if (ln == NULL)
+ return NULL;
+
+ ln->fn_bit = plen;
+
+ ln->parent = pn;
+
+ ln->fn_sernum = sernum;
+
+ if (dir)
+ pn->right = ln;
+ else
+ pn->left = ln;
+
+ if (addr_bit_set(&key->addr, plen))
+ ln->right = fn;
+ else
+ ln->left = fn;
+
+ fn->parent = ln;
+ }
+ return ln;
+}
+
+/*
+ * Insert routing information in a node.
+ */
+
+static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
+ struct nlmsghdr *nlh)
+{
+ struct rt6_info *iter = NULL;
+ struct rt6_info **ins;
+
+ ins = &fn->leaf;
+
+ if (fn->fn_flags&RTN_TL_ROOT &&
+ fn->leaf == &ip6_null_entry &&
+ !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
+ fn->leaf = rt;
+ rt->u.next = NULL;
+ goto out;
+ }
+
+ for (iter = fn->leaf; iter; iter=iter->u.next) {
+ /*
+ * Search for duplicates
+ */
+
+ if (iter->rt6i_metric == rt->rt6i_metric) {
+ /*
+ * Same priority level
+ */
+
+ if (iter->rt6i_dev == rt->rt6i_dev &&
+ iter->rt6i_idev == rt->rt6i_idev &&
+ ipv6_addr_equal(&iter->rt6i_gateway,
+ &rt->rt6i_gateway)) {
+ if (!(iter->rt6i_flags&RTF_EXPIRES))
+ return -EEXIST;
+ iter->rt6i_expires = rt->rt6i_expires;
+ if (!(rt->rt6i_flags&RTF_EXPIRES)) {
+ iter->rt6i_flags &= ~RTF_EXPIRES;
+ iter->rt6i_expires = 0;
+ }
+ return -EEXIST;
+ }
+ }
+
+ if (iter->rt6i_metric > rt->rt6i_metric)
+ break;
+
+ ins = &iter->u.next;
+ }
+
+ /*
+ * insert node
+ */
+
+out:
+ rt->u.next = iter;
+ *ins = rt;
+ rt->rt6i_node = fn;
+ atomic_inc(&rt->rt6i_ref);
+ inet6_rt_notify(RTM_NEWROUTE, rt, nlh);
+ rt6_stats.fib_rt_entries++;
+
+ if ((fn->fn_flags & RTN_RTINFO) == 0) {
+ rt6_stats.fib_route_nodes++;
+ fn->fn_flags |= RTN_RTINFO;
+ }
+
+ return 0;
+}
+
+static __inline__ void fib6_start_gc(struct rt6_info *rt)
+{
+ if (ip6_fib_timer.expires == 0 &&
+ (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
+ mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+}
+
+void fib6_force_start_gc(void)
+{
+ if (ip6_fib_timer.expires == 0)
+ mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+}
+
+/*
+ * Add routing information to the routing tree.
+ * <destination addr>/<source addr>
+ * with source addr info in sub-trees
+ */
+
+int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+ struct fib6_node *fn;
+ int err = -ENOMEM;
+
+ fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
+ rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
+
+ if (fn == NULL)
+ goto out;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt->rt6i_src.plen) {
+ struct fib6_node *sn;
+
+ if (fn->subtree == NULL) {
+ struct fib6_node *sfn;
+
+ /*
+ * Create subtree.
+ *
+ * fn[main tree]
+ * |
+ * sfn[subtree root]
+ * \
+ * sn[new leaf node]
+ */
+
+ /* Create subtree root node */
+ sfn = node_alloc();
+ if (sfn == NULL)
+ goto st_failure;
+
+ sfn->leaf = &ip6_null_entry;
+ atomic_inc(&ip6_null_entry.rt6i_ref);
+ sfn->fn_flags = RTN_ROOT;
+ sfn->fn_sernum = fib6_new_sernum();
+
+ /* Now add the first leaf node to new subtree */
+
+ sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src));
+
+ if (sn == NULL) {
+ /* If it is failed, discard just allocated
+ root, and then (in st_failure) stale node
+ in main tree.
+ */
+ node_free(sfn);
+ goto st_failure;
+ }
+
+ /* Now link new subtree to main tree */
+ sfn->parent = fn;
+ fn->subtree = sfn;
+ if (fn->leaf == NULL) {
+ fn->leaf = rt;
+ atomic_inc(&rt->rt6i_ref);
+ }
+ } else {
+ sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
+ sizeof(struct in6_addr), rt->rt6i_src.plen,
+ offsetof(struct rt6_info, rt6i_src));
+
+ if (sn == NULL)
+ goto st_failure;
+ }
+
+ fn = sn;
+ }
+#endif
+
+ err = fib6_add_rt2node(fn, rt, nlh);
+
+ if (err == 0) {
+ fib6_start_gc(rt);
+ if (!(rt->rt6i_flags&RTF_CACHE))
+ fib6_prune_clones(fn, rt);
+ }
+
+out:
+ if (err)
+ dst_free(&rt->u.dst);
+ return err;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree creation failed, probably main tree node
+ is orphan. If it is, shoot it.
+ */
+st_failure:
+ if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
+ fib6_repair_tree(fn);
+ dst_free(&rt->u.dst);
+ return err;
+#endif
+}
+
+/*
+ * Routing tree lookup
+ *
+ */
+
+struct lookup_args {
+ int offset; /* key offset on rt6_info */
+ struct in6_addr *addr; /* search key */
+};
+
+static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
+ struct lookup_args *args)
+{
+ struct fib6_node *fn;
+ int dir;
+
+ /*
+ * Descend on a tree
+ */
+
+ fn = root;
+
+ for (;;) {
+ struct fib6_node *next;
+
+ dir = addr_bit_set(args->addr, fn->fn_bit);
+
+ next = dir ? fn->right : fn->left;
+
+ if (next) {
+ fn = next;
+ continue;
+ }
+
+ break;
+ }
+
+ while ((fn->fn_flags & RTN_ROOT) == 0) {
+#ifdef CONFIG_IPV6_SUBTREES
+ if (fn->subtree) {
+ struct fib6_node *st;
+ struct lookup_args *narg;
+
+ narg = args + 1;
+
+ if (narg->addr) {
+ st = fib6_lookup_1(fn->subtree, narg);
+
+ if (st && !(st->fn_flags & RTN_ROOT))
+ return st;
+ }
+ }
+#endif
+
+ if (fn->fn_flags & RTN_RTINFO) {
+ struct rt6key *key;
+
+ key = (struct rt6key *) ((u8 *) fn->leaf +
+ args->offset);
+
+ if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
+ return fn;
+ }
+
+ fn = fn->parent;
+ }
+
+ return NULL;
+}
+
+struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct lookup_args args[2];
+ struct fib6_node *fn;
+
+ args[0].offset = offsetof(struct rt6_info, rt6i_dst);
+ args[0].addr = daddr;
+
+#ifdef CONFIG_IPV6_SUBTREES
+ args[1].offset = offsetof(struct rt6_info, rt6i_src);
+ args[1].addr = saddr;
+#endif
+
+ fn = fib6_lookup_1(root, args);
+
+ if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
+ fn = root;
+
+ return fn;
+}
+
+/*
+ * Get node with specified destination prefix (and source prefix,
+ * if subtrees are used)
+ */
+
+
+static struct fib6_node * fib6_locate_1(struct fib6_node *root,
+ struct in6_addr *addr,
+ int plen, int offset)
+{
+ struct fib6_node *fn;
+
+ for (fn = root; fn ; ) {
+ struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+
+ /*
+ * Prefix match
+ */
+ if (plen < fn->fn_bit ||
+ !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
+ return NULL;
+
+ if (plen == fn->fn_bit)
+ return fn;
+
+ /*
+ * We have more bits to go
+ */
+ if (addr_bit_set(addr, fn->fn_bit))
+ fn = fn->right;
+ else
+ fn = fn->left;
+ }
+ return NULL;
+}
+
+struct fib6_node * fib6_locate(struct fib6_node *root,
+ struct in6_addr *daddr, int dst_len,
+ struct in6_addr *saddr, int src_len)
+{
+ struct fib6_node *fn;
+
+ fn = fib6_locate_1(root, daddr, dst_len,
+ offsetof(struct rt6_info, rt6i_dst));
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src_len) {
+ BUG_TRAP(saddr!=NULL);
+ if (fn == NULL)
+ fn = fn->subtree;
+ if (fn)
+ fn = fib6_locate_1(fn, saddr, src_len,
+ offsetof(struct rt6_info, rt6i_src));
+ }
+#endif
+
+ if (fn && fn->fn_flags&RTN_RTINFO)
+ return fn;
+
+ return NULL;
+}
+
+
+/*
+ * Deletion
+ *
+ */
+
+static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
+{
+ if (fn->fn_flags&RTN_ROOT)
+ return &ip6_null_entry;
+
+ while(fn) {
+ if(fn->left)
+ return fn->left->leaf;
+
+ if(fn->right)
+ return fn->right->leaf;
+
+ fn = SUBTREE(fn);
+ }
+ return NULL;
+}
+
+/*
+ * Called to trim the tree of intermediate nodes when possible. "fn"
+ * is the node we want to try and remove.
+ */
+
+static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
+{
+ int children;
+ int nstate;
+ struct fib6_node *child, *pn;
+ struct fib6_walker_t *w;
+ int iter = 0;
+
+ for (;;) {
+ RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
+ iter++;
+
+ BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
+ BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
+ BUG_TRAP(fn->leaf==NULL);
+
+ children = 0;
+ child = NULL;
+ if (fn->right) child = fn->right, children |= 1;
+ if (fn->left) child = fn->left, children |= 2;
+
+ if (children == 3 || SUBTREE(fn)
+#ifdef CONFIG_IPV6_SUBTREES
+ /* Subtree root (i.e. fn) may have one child */
+ || (children && fn->fn_flags&RTN_ROOT)
+#endif
+ ) {
+ fn->leaf = fib6_find_prefix(fn);
+#if RT6_DEBUG >= 2
+ if (fn->leaf==NULL) {
+ BUG_TRAP(fn->leaf);
+ fn->leaf = &ip6_null_entry;
+ }
+#endif
+ atomic_inc(&fn->leaf->rt6i_ref);
+ return fn->parent;
+ }
+
+ pn = fn->parent;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (SUBTREE(pn) == fn) {
+ BUG_TRAP(fn->fn_flags&RTN_ROOT);
+ SUBTREE(pn) = NULL;
+ nstate = FWS_L;
+ } else {
+ BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
+#endif
+ if (pn->right == fn) pn->right = child;
+ else if (pn->left == fn) pn->left = child;
+#if RT6_DEBUG >= 2
+ else BUG_TRAP(0);
+#endif
+ if (child)
+ child->parent = pn;
+ nstate = FWS_R;
+#ifdef CONFIG_IPV6_SUBTREES
+ }
+#endif
+
+ read_lock(&fib6_walker_lock);
+ FOR_WALKERS(w) {
+ if (child == NULL) {
+ if (w->root == fn) {
+ w->root = w->node = NULL;
+ RT6_TRACE("W %p adjusted by delroot 1\n", w);
+ } else if (w->node == fn) {
+ RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
+ w->node = pn;
+ w->state = nstate;
+ }
+ } else {
+ if (w->root == fn) {
+ w->root = child;
+ RT6_TRACE("W %p adjusted by delroot 2\n", w);
+ }
+ if (w->node == fn) {
+ w->node = child;
+ if (children&2) {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
+ } else {
+ RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
+ w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
+ }
+ }
+ }
+ }
+ read_unlock(&fib6_walker_lock);
+
+ node_free(fn);
+ if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
+ return pn;
+
+ rt6_release(pn->leaf);
+ pn->leaf = NULL;
+ fn = pn;
+ }
+}
+
+static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
+ struct nlmsghdr *nlh, void *_rtattr)
+{
+ struct fib6_walker_t *w;
+ struct rt6_info *rt = *rtp;
+
+ RT6_TRACE("fib6_del_route\n");
+
+ /* Unlink it */
+ *rtp = rt->u.next;
+ rt->rt6i_node = NULL;
+ rt6_stats.fib_rt_entries--;
+ rt6_stats.fib_discarded_routes++;
+
+ /* Adjust walkers */
+ read_lock(&fib6_walker_lock);
+ FOR_WALKERS(w) {
+ if (w->state == FWS_C && w->leaf == rt) {
+ RT6_TRACE("walker %p adjusted by delroute\n", w);
+ w->leaf = rt->u.next;
+ if (w->leaf == NULL)
+ w->state = FWS_U;
+ }
+ }
+ read_unlock(&fib6_walker_lock);
+
+ rt->u.next = NULL;
+
+ if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
+ fn->leaf = &ip6_null_entry;
+
+ /* If it was last route, expunge its radix tree node */
+ if (fn->leaf == NULL) {
+ fn->fn_flags &= ~RTN_RTINFO;
+ rt6_stats.fib_route_nodes--;
+ fn = fib6_repair_tree(fn);
+ }
+
+ if (atomic_read(&rt->rt6i_ref) != 1) {
+ /* This route is used as dummy address holder in some split
+ * nodes. It is not leaked, but it still holds other resources,
+ * which must be released in time. So, scan ascendant nodes
+ * and replace dummy references to this route with references
+ * to still alive ones.
+ */
+ while (fn) {
+ if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
+ fn->leaf = fib6_find_prefix(fn);
+ atomic_inc(&fn->leaf->rt6i_ref);
+ rt6_release(rt);
+ }
+ fn = fn->parent;
+ }
+ /* No more references are possible at this point. */
+ if (atomic_read(&rt->rt6i_ref) != 1) BUG();
+ }
+
+ inet6_rt_notify(RTM_DELROUTE, rt, nlh);
+ rt6_release(rt);
+}
+
+int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+{
+ struct fib6_node *fn = rt->rt6i_node;
+ struct rt6_info **rtp;
+
+#if RT6_DEBUG >= 2
+ if (rt->u.dst.obsolete>0) {
+ BUG_TRAP(fn==NULL);
+ return -ENOENT;
+ }
+#endif
+ if (fn == NULL || rt == &ip6_null_entry)
+ return -ENOENT;
+
+ BUG_TRAP(fn->fn_flags&RTN_RTINFO);
+
+ if (!(rt->rt6i_flags&RTF_CACHE))
+ fib6_prune_clones(fn, rt);
+
+ /*
+ * Walk the leaf entries looking for ourself
+ */
+
+ for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
+ if (*rtp == rt) {
+ fib6_del_route(fn, rtp, nlh, _rtattr);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * Tree traversal function.
+ *
+ * Certainly, it is not interrupt safe.
+ * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
+ * It means, that we can modify tree during walking
+ * and use this function for garbage collection, clone pruning,
+ * cleaning tree when a device goes down etc. etc.
+ *
+ * It guarantees that every node will be traversed,
+ * and that it will be traversed only once.
+ *
+ * Callback function w->func may return:
+ * 0 -> continue walking.
+ * positive value -> walking is suspended (used by tree dumps,
+ * and probably by gc, if it will be split to several slices)
+ * negative value -> terminate walking.
+ *
+ * The function itself returns:
+ * 0 -> walk is complete.
+ * >0 -> walk is incomplete (i.e. suspended)
+ * <0 -> walk is terminated by an error.
+ */
+
+int fib6_walk_continue(struct fib6_walker_t *w)
+{
+ struct fib6_node *fn, *pn;
+
+ for (;;) {
+ fn = w->node;
+ if (fn == NULL)
+ return 0;
+
+ if (w->prune && fn != w->root &&
+ fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ }
+ switch (w->state) {
+#ifdef CONFIG_IPV6_SUBTREES
+ case FWS_S:
+ if (SUBTREE(fn)) {
+ w->node = SUBTREE(fn);
+ continue;
+ }
+ w->state = FWS_L;
+#endif
+ case FWS_L:
+ if (fn->left) {
+ w->node = fn->left;
+ w->state = FWS_INIT;
+ continue;
+ }
+ w->state = FWS_R;
+ case FWS_R:
+ if (fn->right) {
+ w->node = fn->right;
+ w->state = FWS_INIT;
+ continue;
+ }
+ w->state = FWS_C;
+ w->leaf = fn->leaf;
+ case FWS_C:
+ if (w->leaf && fn->fn_flags&RTN_RTINFO) {
+ int err = w->func(w);
+ if (err)
+ return err;
+ continue;
+ }
+ w->state = FWS_U;
+ case FWS_U:
+ if (fn == w->root)
+ return 0;
+ pn = fn->parent;
+ w->node = pn;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (SUBTREE(pn) == fn) {
+ BUG_TRAP(fn->fn_flags&RTN_ROOT);
+ w->state = FWS_L;
+ continue;
+ }
+#endif
+ if (pn->left == fn) {
+ w->state = FWS_R;
+ continue;
+ }
+ if (pn->right == fn) {
+ w->state = FWS_C;
+ w->leaf = w->node->leaf;
+ continue;
+ }
+#if RT6_DEBUG >= 2
+ BUG_TRAP(0);
+#endif
+ }
+ }
+}
+
+int fib6_walk(struct fib6_walker_t *w)
+{
+ int res;
+
+ w->state = FWS_INIT;
+ w->node = w->root;
+
+ fib6_walker_link(w);
+ res = fib6_walk_continue(w);
+ if (res <= 0)
+ fib6_walker_unlink(w);
+ return res;
+}
+
+static int fib6_clean_node(struct fib6_walker_t *w)
+{
+ int res;
+ struct rt6_info *rt;
+ struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+
+ for (rt = w->leaf; rt; rt = rt->u.next) {
+ res = c->func(rt, c->arg);
+ if (res < 0) {
+ w->leaf = rt;
+ res = fib6_del(rt, NULL, NULL);
+ if (res) {
+#if RT6_DEBUG >= 2
+ printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+#endif
+ continue;
+ }
+ return 0;
+ }
+ BUG_TRAP(res==0);
+ }
+ w->leaf = rt;
+ return 0;
+}
+
+/*
+ * Convenient frontend to tree walker.
+ *
+ * func is called on each route.
+ * It may return -1 -> delete this route.
+ * 0 -> continue walking
+ *
+ * prune==1 -> only immediate children of node (certainly,
+ * ignoring pure split nodes) will be scanned.
+ */
+
+void fib6_clean_tree(struct fib6_node *root,
+ int (*func)(struct rt6_info *, void *arg),
+ int prune, void *arg)
+{
+ struct fib6_cleaner_t c;
+
+ c.w.root = root;
+ c.w.func = fib6_clean_node;
+ c.w.prune = prune;
+ c.func = func;
+ c.arg = arg;
+
+ fib6_walk(&c.w);
+}
+
+static int fib6_prune_clone(struct rt6_info *rt, void *arg)
+{
+ if (rt->rt6i_flags & RTF_CACHE) {
+ RT6_TRACE("pruning clone %p\n", rt);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
+{
+ fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
+}
+
+/*
+ * Garbage collection
+ */
+
+static struct fib6_gc_args
+{
+ int timeout;
+ int more;
+} gc_args;
+
+static int fib6_age(struct rt6_info *rt, void *arg)
+{
+ unsigned long now = jiffies;
+
+ /*
+ * check addrconf expiration here.
+ * Routes are expired even if they are in use.
+ *
+ * Also age clones. Note, that clones are aged out
+ * only if they are not in use now.
+ */
+
+ if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
+ if (time_after(now, rt->rt6i_expires)) {
+ RT6_TRACE("expiring %p\n", rt);
+ rt6_reset_dflt_pointer(rt);
+ return -1;
+ }
+ gc_args.more++;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
+ time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ return -1;
+ } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
+ (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ return -1;
+ }
+ gc_args.more++;
+ }
+
+ return 0;
+}
+
+static DEFINE_SPINLOCK(fib6_gc_lock);
+
+void fib6_run_gc(unsigned long dummy)
+{
+ if (dummy != ~0UL) {
+ spin_lock_bh(&fib6_gc_lock);
+ gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
+ } else {
+ local_bh_disable();
+ if (!spin_trylock(&fib6_gc_lock)) {
+ mod_timer(&ip6_fib_timer, jiffies + HZ);
+ local_bh_enable();
+ return;
+ }
+ gc_args.timeout = ip6_rt_gc_interval;
+ }
+ gc_args.more = 0;
+
+
+ write_lock_bh(&rt6_lock);
+ ndisc_dst_gc(&gc_args.more);
+ fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
+ write_unlock_bh(&rt6_lock);
+
+ if (gc_args.more)
+ mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+ else {
+ del_timer(&ip6_fib_timer);
+ ip6_fib_timer.expires = 0;
+ }
+ spin_unlock_bh(&fib6_gc_lock);
+}
+
+void __init fib6_init(void)
+{
+ fib6_node_kmem = kmem_cache_create("fib6_nodes",
+ sizeof(struct fib6_node),
+ 0, SLAB_HWCACHE_ALIGN,
+ NULL, NULL);
+ if (!fib6_node_kmem)
+ panic("cannot create fib6_nodes cache");
+}
+
+void fib6_gc_cleanup(void)
+{
+ del_timer(&ip6_fib_timer);
+ kmem_cache_destroy(fib6_node_kmem);
+}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
new file mode 100644
index 000000000000..a93f6dc51979
--- /dev/null
+++ b/net/ipv6/ip6_flowlabel.c
@@ -0,0 +1,706 @@
+/*
+ * ip6_flowlabel.c IPv6 flowlabel manager.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <net/sock.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/transp_v6.h>
+
+#include <asm/uaccess.h>
+
+#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
+ in old IPv6 RFC. Well, it was reasonable value.
+ */
+#define FL_MAX_LINGER 60 /* Maximal linger timeout */
+
+/* FL hash table */
+
+#define FL_MAX_PER_SOCK 32
+#define FL_MAX_SIZE 4096
+#define FL_HASH_MASK 255
+#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
+
+static atomic_t fl_size = ATOMIC_INIT(0);
+static struct ip6_flowlabel *fl_ht[FL_HASH_MASK+1];
+
+static void ip6_fl_gc(unsigned long dummy);
+static struct timer_list ip6_fl_gc_timer = TIMER_INITIALIZER(ip6_fl_gc, 0, 0);
+
+/* FL hash table lock: it protects only of GC */
+
+static DEFINE_RWLOCK(ip6_fl_lock);
+
+/* Big socket sock */
+
+static DEFINE_RWLOCK(ip6_sk_fl_lock);
+
+
+static __inline__ struct ip6_flowlabel * __fl_lookup(u32 label)
+{
+ struct ip6_flowlabel *fl;
+
+ for (fl=fl_ht[FL_HASH(label)]; fl; fl = fl->next) {
+ if (fl->label == label)
+ return fl;
+ }
+ return NULL;
+}
+
+static struct ip6_flowlabel * fl_lookup(u32 label)
+{
+ struct ip6_flowlabel *fl;
+
+ read_lock_bh(&ip6_fl_lock);
+ fl = __fl_lookup(label);
+ if (fl)
+ atomic_inc(&fl->users);
+ read_unlock_bh(&ip6_fl_lock);
+ return fl;
+}
+
+
+static void fl_free(struct ip6_flowlabel *fl)
+{
+ if (fl)
+ kfree(fl->opt);
+ kfree(fl);
+}
+
+static void fl_release(struct ip6_flowlabel *fl)
+{
+ write_lock_bh(&ip6_fl_lock);
+
+ fl->lastuse = jiffies;
+ if (atomic_dec_and_test(&fl->users)) {
+ unsigned long ttd = fl->lastuse + fl->linger;
+ if (time_after(ttd, fl->expires))
+ fl->expires = ttd;
+ ttd = fl->expires;
+ if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
+ struct ipv6_txoptions *opt = fl->opt;
+ fl->opt = NULL;
+ kfree(opt);
+ }
+ if (!timer_pending(&ip6_fl_gc_timer) ||
+ time_after(ip6_fl_gc_timer.expires, ttd))
+ mod_timer(&ip6_fl_gc_timer, ttd);
+ }
+
+ write_unlock_bh(&ip6_fl_lock);
+}
+
+static void ip6_fl_gc(unsigned long dummy)
+{
+ int i;
+ unsigned long now = jiffies;
+ unsigned long sched = 0;
+
+ write_lock(&ip6_fl_lock);
+
+ for (i=0; i<=FL_HASH_MASK; i++) {
+ struct ip6_flowlabel *fl, **flp;
+ flp = &fl_ht[i];
+ while ((fl=*flp) != NULL) {
+ if (atomic_read(&fl->users) == 0) {
+ unsigned long ttd = fl->lastuse + fl->linger;
+ if (time_after(ttd, fl->expires))
+ fl->expires = ttd;
+ ttd = fl->expires;
+ if (time_after_eq(now, ttd)) {
+ *flp = fl->next;
+ fl_free(fl);
+ atomic_dec(&fl_size);
+ continue;
+ }
+ if (!sched || time_before(ttd, sched))
+ sched = ttd;
+ }
+ flp = &fl->next;
+ }
+ }
+ if (!sched && atomic_read(&fl_size))
+ sched = now + FL_MAX_LINGER;
+ if (sched) {
+ ip6_fl_gc_timer.expires = sched;
+ add_timer(&ip6_fl_gc_timer);
+ }
+ write_unlock(&ip6_fl_lock);
+}
+
+static int fl_intern(struct ip6_flowlabel *fl, __u32 label)
+{
+ fl->label = label & IPV6_FLOWLABEL_MASK;
+
+ write_lock_bh(&ip6_fl_lock);
+ if (label == 0) {
+ for (;;) {
+ fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
+ if (fl->label) {
+ struct ip6_flowlabel *lfl;
+ lfl = __fl_lookup(fl->label);
+ if (lfl == NULL)
+ break;
+ }
+ }
+ }
+
+ fl->lastuse = jiffies;
+ fl->next = fl_ht[FL_HASH(fl->label)];
+ fl_ht[FL_HASH(fl->label)] = fl;
+ atomic_inc(&fl_size);
+ write_unlock_bh(&ip6_fl_lock);
+ return 0;
+}
+
+
+
+/* Socket flowlabel lists */
+
+struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, u32 label)
+{
+ struct ipv6_fl_socklist *sfl;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ label &= IPV6_FLOWLABEL_MASK;
+
+ for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ struct ip6_flowlabel *fl = sfl->fl;
+ if (fl->label == label) {
+ fl->lastuse = jiffies;
+ atomic_inc(&fl->users);
+ return fl;
+ }
+ }
+ return NULL;
+}
+
+void fl6_free_socklist(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+
+ while ((sfl = np->ipv6_fl_list) != NULL) {
+ np->ipv6_fl_list = sfl->next;
+ fl_release(sfl->fl);
+ kfree(sfl);
+ }
+}
+
+/* Service routines */
+
+
+/*
+ It is the only difficult place. flowlabel enforces equal headers
+ before and including routing header, however user may supply options
+ following rthdr.
+ */
+
+struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
+ struct ip6_flowlabel * fl,
+ struct ipv6_txoptions * fopt)
+{
+ struct ipv6_txoptions * fl_opt = fl->opt;
+
+ if (fopt == NULL || fopt->opt_flen == 0)
+ return fl_opt;
+
+ if (fl_opt != NULL) {
+ opt_space->hopopt = fl_opt->hopopt;
+ opt_space->dst0opt = fl_opt->dst0opt;
+ opt_space->srcrt = fl_opt->srcrt;
+ opt_space->opt_nflen = fl_opt->opt_nflen;
+ } else {
+ if (fopt->opt_nflen == 0)
+ return fopt;
+ opt_space->hopopt = NULL;
+ opt_space->dst0opt = NULL;
+ opt_space->srcrt = NULL;
+ opt_space->opt_nflen = 0;
+ }
+ opt_space->dst1opt = fopt->dst1opt;
+ opt_space->auth = fopt->auth;
+ opt_space->opt_flen = fopt->opt_flen;
+ return opt_space;
+}
+
+static unsigned long check_linger(unsigned long ttl)
+{
+ if (ttl < FL_MIN_LINGER)
+ return FL_MIN_LINGER*HZ;
+ if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
+ return 0;
+ return ttl*HZ;
+}
+
+static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
+{
+ linger = check_linger(linger);
+ if (!linger)
+ return -EPERM;
+ expires = check_linger(expires);
+ if (!expires)
+ return -EPERM;
+ fl->lastuse = jiffies;
+ if (time_before(fl->linger, linger))
+ fl->linger = linger;
+ if (time_before(expires, fl->linger))
+ expires = fl->linger;
+ if (time_before(fl->expires, fl->lastuse + expires))
+ fl->expires = fl->lastuse + expires;
+ return 0;
+}
+
+static struct ip6_flowlabel *
+fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int *err_p)
+{
+ struct ip6_flowlabel *fl;
+ int olen;
+ int addr_type;
+ int err;
+
+ err = -ENOMEM;
+ fl = kmalloc(sizeof(*fl), GFP_KERNEL);
+ if (fl == NULL)
+ goto done;
+ memset(fl, 0, sizeof(*fl));
+
+ olen = optlen - CMSG_ALIGN(sizeof(*freq));
+ if (olen > 0) {
+ struct msghdr msg;
+ struct flowi flowi;
+ int junk;
+
+ err = -ENOMEM;
+ fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
+ if (fl->opt == NULL)
+ goto done;
+
+ memset(fl->opt, 0, sizeof(*fl->opt));
+ fl->opt->tot_len = sizeof(*fl->opt) + olen;
+ err = -EFAULT;
+ if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
+ goto done;
+
+ msg.msg_controllen = olen;
+ msg.msg_control = (void*)(fl->opt+1);
+ flowi.oif = 0;
+
+ err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk);
+ if (err)
+ goto done;
+ err = -EINVAL;
+ if (fl->opt->opt_flen)
+ goto done;
+ if (fl->opt->opt_nflen == 0) {
+ kfree(fl->opt);
+ fl->opt = NULL;
+ }
+ }
+
+ fl->expires = jiffies;
+ err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
+ if (err)
+ goto done;
+ fl->share = freq->flr_share;
+ addr_type = ipv6_addr_type(&freq->flr_dst);
+ if ((addr_type&IPV6_ADDR_MAPPED)
+ || addr_type == IPV6_ADDR_ANY)
+ goto done;
+ ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+ atomic_set(&fl->users, 1);
+ switch (fl->share) {
+ case IPV6_FL_S_EXCL:
+ case IPV6_FL_S_ANY:
+ break;
+ case IPV6_FL_S_PROCESS:
+ fl->owner = current->pid;
+ break;
+ case IPV6_FL_S_USER:
+ fl->owner = current->euid;
+ break;
+ default:
+ err = -EINVAL;
+ goto done;
+ }
+ return fl;
+
+done:
+ fl_free(fl);
+ *err_p = err;
+ return NULL;
+}
+
+static int mem_check(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_fl_socklist *sfl;
+ int room = FL_MAX_SIZE - atomic_read(&fl_size);
+ int count = 0;
+
+ if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
+ return 0;
+
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
+ count++;
+
+ if (room <= 0 ||
+ ((count >= FL_MAX_PER_SOCK ||
+ (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4)
+ && !capable(CAP_NET_ADMIN)))
+ return -ENOBUFS;
+
+ return 0;
+}
+
+static int ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
+{
+ if (h1 == h2)
+ return 0;
+ if (h1 == NULL || h2 == NULL)
+ return 1;
+ if (h1->hdrlen != h2->hdrlen)
+ return 1;
+ return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
+}
+
+static int ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
+{
+ if (o1 == o2)
+ return 0;
+ if (o1 == NULL || o2 == NULL)
+ return 1;
+ if (o1->opt_nflen != o2->opt_nflen)
+ return 1;
+ if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
+ return 1;
+ if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
+ return 1;
+ if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
+ return 1;
+ return 0;
+}
+
+int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
+{
+ int err;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct in6_flowlabel_req freq;
+ struct ipv6_fl_socklist *sfl1=NULL;
+ struct ipv6_fl_socklist *sfl, **sflp;
+ struct ip6_flowlabel *fl;
+
+ if (optlen < sizeof(freq))
+ return -EINVAL;
+
+ if (copy_from_user(&freq, optval, sizeof(freq)))
+ return -EFAULT;
+
+ switch (freq.flr_action) {
+ case IPV6_FL_A_PUT:
+ write_lock_bh(&ip6_sk_fl_lock);
+ for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
+ np->flow_label &= ~IPV6_FLOWLABEL_MASK;
+ *sflp = sfl->next;
+ write_unlock_bh(&ip6_sk_fl_lock);
+ fl_release(sfl->fl);
+ kfree(sfl);
+ return 0;
+ }
+ }
+ write_unlock_bh(&ip6_sk_fl_lock);
+ return -ESRCH;
+
+ case IPV6_FL_A_RENEW:
+ read_lock_bh(&ip6_sk_fl_lock);
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
+ read_unlock_bh(&ip6_sk_fl_lock);
+ return err;
+ }
+ }
+ read_unlock_bh(&ip6_sk_fl_lock);
+
+ if (freq.flr_share == IPV6_FL_S_NONE && capable(CAP_NET_ADMIN)) {
+ fl = fl_lookup(freq.flr_label);
+ if (fl) {
+ err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
+ fl_release(fl);
+ return err;
+ }
+ }
+ return -ESRCH;
+
+ case IPV6_FL_A_GET:
+ if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
+ return -EINVAL;
+
+ fl = fl_create(&freq, optval, optlen, &err);
+ if (fl == NULL)
+ return err;
+ sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
+
+ if (freq.flr_label) {
+ struct ip6_flowlabel *fl1 = NULL;
+
+ err = -EEXIST;
+ read_lock_bh(&ip6_sk_fl_lock);
+ for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
+ if (sfl->fl->label == freq.flr_label) {
+ if (freq.flr_flags&IPV6_FL_F_EXCL) {
+ read_unlock_bh(&ip6_sk_fl_lock);
+ goto done;
+ }
+ fl1 = sfl->fl;
+ atomic_inc(&fl->users);
+ break;
+ }
+ }
+ read_unlock_bh(&ip6_sk_fl_lock);
+
+ if (fl1 == NULL)
+ fl1 = fl_lookup(freq.flr_label);
+ if (fl1) {
+ err = -EEXIST;
+ if (freq.flr_flags&IPV6_FL_F_EXCL)
+ goto release;
+ err = -EPERM;
+ if (fl1->share == IPV6_FL_S_EXCL ||
+ fl1->share != fl->share ||
+ fl1->owner != fl->owner)
+ goto release;
+
+ err = -EINVAL;
+ if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
+ ipv6_opt_cmp(fl1->opt, fl->opt))
+ goto release;
+
+ err = -ENOMEM;
+ if (sfl1 == NULL)
+ goto release;
+ if (fl->linger > fl1->linger)
+ fl1->linger = fl->linger;
+ if ((long)(fl->expires - fl1->expires) > 0)
+ fl1->expires = fl->expires;
+ write_lock_bh(&ip6_sk_fl_lock);
+ sfl1->fl = fl1;
+ sfl1->next = np->ipv6_fl_list;
+ np->ipv6_fl_list = sfl1;
+ write_unlock_bh(&ip6_sk_fl_lock);
+ fl_free(fl);
+ return 0;
+
+release:
+ fl_release(fl1);
+ goto done;
+ }
+ }
+ err = -ENOENT;
+ if (!(freq.flr_flags&IPV6_FL_F_CREATE))
+ goto done;
+
+ err = -ENOMEM;
+ if (sfl1 == NULL || (err = mem_check(sk)) != 0)
+ goto done;
+
+ err = fl_intern(fl, freq.flr_label);
+ if (err)
+ goto done;
+
+ /* Do not check for fault */
+ if (!freq.flr_label)
+ copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
+ &fl->label, sizeof(fl->label));
+
+ sfl1->fl = fl;
+ sfl1->next = np->ipv6_fl_list;
+ np->ipv6_fl_list = sfl1;
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+
+done:
+ fl_free(fl);
+ kfree(sfl1);
+ return err;
+}
+
+#ifdef CONFIG_PROC_FS
+
+struct ip6fl_iter_state {
+ int bucket;
+};
+
+#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
+
+static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
+{
+ struct ip6_flowlabel *fl = NULL;
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+ for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
+ if (fl_ht[state->bucket]) {
+ fl = fl_ht[state->bucket];
+ break;
+ }
+ }
+ return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
+{
+ struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
+ fl = fl->next;
+ while (!fl) {
+ if (++state->bucket <= FL_HASH_MASK)
+ fl = fl_ht[state->bucket];
+ }
+ return fl;
+}
+
+static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ip6_flowlabel *fl = ip6fl_get_first(seq);
+ if (fl)
+ while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
+ --pos;
+ return pos ? NULL : fl;
+}
+
+static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock_bh(&ip6_fl_lock);
+ return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip6_flowlabel *fl;
+
+ if (v == SEQ_START_TOKEN)
+ fl = ip6fl_get_first(seq);
+ else
+ fl = ip6fl_get_next(seq, v);
+ ++*pos;
+ return fl;
+}
+
+static void ip6fl_seq_stop(struct seq_file *seq, void *v)
+{
+ read_unlock_bh(&ip6_fl_lock);
+}
+
+static void ip6fl_fl_seq_show(struct seq_file *seq, struct ip6_flowlabel *fl)
+{
+ while(fl) {
+ seq_printf(seq,
+ "%05X %-1d %-6d %-6d %-6ld %-8ld "
+ "%02x%02x%02x%02x%02x%02x%02x%02x "
+ "%-4d\n",
+ (unsigned)ntohl(fl->label),
+ fl->share,
+ (unsigned)fl->owner,
+ atomic_read(&fl->users),
+ fl->linger/HZ,
+ (long)(fl->expires - jiffies)/HZ,
+ NIP6(fl->dst),
+ fl->opt ? fl->opt->opt_nflen : 0);
+ fl = fl->next;
+ }
+}
+
+static int ip6fl_seq_show(struct seq_file *seq, void *v)
+{
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Label S Owner Users Linger Expires "
+ "Dst Opt\n");
+ else
+ ip6fl_fl_seq_show(seq, v);
+ return 0;
+}
+
+static struct seq_operations ip6fl_seq_ops = {
+ .start = ip6fl_seq_start,
+ .next = ip6fl_seq_next,
+ .stop = ip6fl_seq_stop,
+ .show = ip6fl_seq_show,
+};
+
+static int ip6fl_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+ struct ip6fl_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
+
+ rc = seq_open(file, &ip6fl_seq_ops);
+ if (rc)
+ goto out_kfree;
+
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
+out:
+ return rc;
+out_kfree:
+ kfree(s);
+ goto out;
+}
+
+static struct file_operations ip6fl_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ip6fl_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+#endif
+
+
+void ip6_flowlabel_init(void)
+{
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+#endif
+}
+
+void ip6_flowlabel_cleanup(void)
+{
+ del_timer(&ip6_fl_gc_timer);
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("ip6_flowlabel");
+#endif
+}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
new file mode 100644
index 000000000000..866f10726c58
--- /dev/null
+++ b/net/ipv6/ip6_input.c
@@ -0,0 +1,269 @@
+/*
+ * IPv6 input
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Ian P. Morris <I.P.Morris@soton.ac.uk>
+ *
+ * $Id: ip6_input.c,v 1.19 2000/12/13 18:31:50 davem Exp $
+ *
+ * Based in linux/net/ipv4/ip_input.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+/* Changes
+ *
+ * Mitsuru KANDA @USAGI and
+ * YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/rawv6.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/xfrm.h>
+
+
+
+static inline int ip6_rcv_finish( struct sk_buff *skb)
+{
+ if (skb->dst == NULL)
+ ip6_route_input(skb);
+
+ return dst_input(skb);
+}
+
+int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+{
+ struct ipv6hdr *hdr;
+ u32 pkt_len;
+
+ if (skb->pkt_type == PACKET_OTHERHOST)
+ goto drop;
+
+ IP6_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
+
+ if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ goto out;
+ }
+
+ /*
+ * Store incoming device index. When the packet will
+ * be queued, we cannot refer to skb->dev anymore.
+ *
+ * BTW, when we send a packet for our own local address on a
+ * non-loopback interface (e.g. ethX), it is being delivered
+ * via the loopback interface (lo) here; skb->dev = &loopback_dev.
+ * It, however, should be considered as if it is being
+ * arrived via the sending interface (ethX), because of the
+ * nature of scoping architecture. --yoshfuji
+ */
+ IP6CB(skb)->iif = skb->dst ? ((struct rt6_info *)skb->dst)->rt6i_idev->dev->ifindex : dev->ifindex;
+
+ if (skb->len < sizeof(struct ipv6hdr))
+ goto err;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ goto drop;
+ }
+
+ hdr = skb->nh.ipv6h;
+
+ if (hdr->version != 6)
+ goto err;
+
+ pkt_len = ntohs(hdr->payload_len);
+
+ /* pkt_len may be zero if Jumbo payload option is present */
+ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+ goto truncated;
+ if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ goto drop;
+ }
+ hdr = skb->nh.ipv6h;
+ }
+
+ if (hdr->nexthdr == NEXTHDR_HOP) {
+ skb->h.raw = (u8*)(hdr+1);
+ if (ipv6_parse_hopopts(skb, offsetof(struct ipv6hdr, nexthdr)) < 0) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+ hdr = skb->nh.ipv6h;
+ }
+
+ return NF_HOOK(PF_INET6,NF_IP6_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish);
+truncated:
+ IP6_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS);
+err:
+ IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS);
+drop:
+ kfree_skb(skb);
+out:
+ return 0;
+}
+
+/*
+ * Deliver the packet to the host
+ */
+
+
+static inline int ip6_input_finish(struct sk_buff *skb)
+{
+ struct inet6_protocol *ipprot;
+ struct sock *raw_sk;
+ unsigned int nhoff;
+ int nexthdr;
+ u8 hash;
+
+ skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+
+ /*
+ * Parse extension headers
+ */
+
+ nexthdr = skb->nh.ipv6h->nexthdr;
+ nhoff = offsetof(struct ipv6hdr, nexthdr);
+
+ /* Skip hop-by-hop options, they are already parsed. */
+ if (nexthdr == NEXTHDR_HOP) {
+ nhoff = sizeof(struct ipv6hdr);
+ nexthdr = skb->h.raw[0];
+ skb->h.raw += (skb->h.raw[1]+1)<<3;
+ }
+
+ rcu_read_lock();
+resubmit:
+ if (!pskb_pull(skb, skb->h.raw - skb->data))
+ goto discard;
+ nexthdr = skb->nh.raw[nhoff];
+
+ raw_sk = sk_head(&raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)]);
+ if (raw_sk)
+ ipv6_raw_deliver(skb, nexthdr);
+
+ hash = nexthdr & (MAX_INET_PROTOS - 1);
+ if ((ipprot = rcu_dereference(inet6_protos[hash])) != NULL) {
+ int ret;
+
+ if (ipprot->flags & INET6_PROTO_FINAL) {
+ struct ipv6hdr *hdr;
+
+ skb_postpull_rcsum(skb, skb->nh.raw,
+ skb->h.raw - skb->nh.raw);
+ hdr = skb->nh.ipv6h;
+ if (ipv6_addr_is_multicast(&hdr->daddr) &&
+ !ipv6_chk_mcast_addr(skb->dev, &hdr->daddr,
+ &hdr->saddr) &&
+ !ipv6_is_mld(skb, nexthdr))
+ goto discard;
+ }
+ if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
+ goto discard;
+
+ ret = ipprot->handler(&skb, &nhoff);
+ if (ret > 0)
+ goto resubmit;
+ else if (ret == 0)
+ IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ } else {
+ if (!raw_sk) {
+ if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS);
+ icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
+ }
+ } else {
+ IP6_INC_STATS_BH(IPSTATS_MIB_INDELIVERS);
+ kfree_skb(skb);
+ }
+ }
+ rcu_read_unlock();
+ return 0;
+
+discard:
+ IP6_INC_STATS_BH(IPSTATS_MIB_INDISCARDS);
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return 0;
+}
+
+
+int ip6_input(struct sk_buff *skb)
+{
+ return NF_HOOK(PF_INET6,NF_IP6_LOCAL_IN, skb, skb->dev, NULL, ip6_input_finish);
+}
+
+int ip6_mc_input(struct sk_buff *skb)
+{
+ struct ipv6hdr *hdr;
+ int deliver;
+
+ IP6_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS);
+
+ hdr = skb->nh.ipv6h;
+ deliver = likely(!(skb->dev->flags & (IFF_PROMISC|IFF_ALLMULTI))) ||
+ ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL);
+
+ /*
+ * IPv6 multicast router mode isnt currently supported.
+ */
+#if 0
+ if (ipv6_config.multicast_route) {
+ int addr_type;
+
+ addr_type = ipv6_addr_type(&hdr->daddr);
+
+ if (!(addr_type & (IPV6_ADDR_LOOPBACK | IPV6_ADDR_LINKLOCAL))) {
+ struct sk_buff *skb2;
+ struct dst_entry *dst;
+
+ dst = skb->dst;
+
+ if (deliver) {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ dst_output(skb2);
+ } else {
+ dst_output(skb);
+ return 0;
+ }
+ }
+ }
+#endif
+
+ if (likely(deliver)) {
+ ip6_input(skb);
+ return 0;
+ }
+ /* discard */
+ kfree_skb(skb);
+
+ return 0;
+}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
new file mode 100644
index 000000000000..49208ba75094
--- /dev/null
+++ b/net/ipv6/ip6_output.c
@@ -0,0 +1,1197 @@
+/*
+ * IPv6 output functions
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
+ *
+ * Based on linux/net/ipv4/ip_output.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ * A.N.Kuznetsov : airthmetics in fragmentation.
+ * extension headers are implemented.
+ * route changes now work.
+ * ip6_forward does not confuse sniffers.
+ * etc.
+ *
+ * H. von Brand : Added missing #include <linux/string.h>
+ * Imran Patel : frag id should be in NBO
+ * Kazunori MIYAZAWA @USAGI
+ * : add ip6_append_data and related functions
+ * for datagram xmit
+ */
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/in6.h>
+#include <linux/tcp.h>
+#include <linux/route.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/rawv6.h>
+#include <net/icmp.h>
+#include <net/xfrm.h>
+#include <net/checksum.h>
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
+
+static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
+{
+ static u32 ipv6_fragmentation_id = 1;
+ static DEFINE_SPINLOCK(ip6_id_lock);
+
+ spin_lock_bh(&ip6_id_lock);
+ fhdr->identification = htonl(ipv6_fragmentation_id);
+ if (++ipv6_fragmentation_id == 0)
+ ipv6_fragmentation_id = 1;
+ spin_unlock_bh(&ip6_id_lock);
+}
+
+static inline int ip6_output_finish(struct sk_buff *skb)
+{
+
+ struct dst_entry *dst = skb->dst;
+ struct hh_cache *hh = dst->hh;
+
+ if (hh) {
+ int hh_alen;
+
+ read_lock_bh(&hh->hh_lock);
+ hh_alen = HH_DATA_ALIGN(hh->hh_len);
+ memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
+ read_unlock_bh(&hh->hh_lock);
+ skb_push(skb, hh->hh_len);
+ return hh->hh_output(skb);
+ } else if (dst->neighbour)
+ return dst->neighbour->output(skb);
+
+ IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+ kfree_skb(skb);
+ return -EINVAL;
+
+}
+
+/* dev_loopback_xmit for use with netfilter. */
+static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
+{
+ newskb->mac.raw = newskb->data;
+ __skb_pull(newskb, newskb->nh.raw - newskb->data);
+ newskb->pkt_type = PACKET_LOOPBACK;
+ newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ BUG_TRAP(newskb->dst);
+
+ netif_rx(newskb);
+ return 0;
+}
+
+
+static int ip6_output2(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct net_device *dev = dst->dev;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
+ struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
+
+ if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
+ ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
+ &skb->nh.ipv6h->saddr)) {
+ struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+
+ /* Do not check for IFF_ALLMULTI; multicast routing
+ is not supported in any case.
+ */
+ if (newskb)
+ NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
+ newskb->dev,
+ ip6_dev_loopback_xmit);
+
+ if (skb->nh.ipv6h->hop_limit == 0) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ return 0;
+ }
+ }
+
+ IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ }
+
+ return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
+}
+
+int ip6_output(struct sk_buff *skb)
+{
+ if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
+ return ip6_fragment(skb, ip6_output2);
+ else
+ return ip6_output2(skb);
+}
+
+#ifdef CONFIG_NETFILTER
+int ip6_route_me_harder(struct sk_buff *skb)
+{
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ struct dst_entry *dst;
+ struct flowi fl = {
+ .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ .nl_u =
+ { .ip6_u =
+ { .daddr = iph->daddr,
+ .saddr = iph->saddr, } },
+ .proto = iph->nexthdr,
+ };
+
+ dst = ip6_route_output(skb->sk, &fl);
+
+ if (dst->error) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
+ LIMIT_NETDEBUG(
+ printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
+ dst_release(dst);
+ return -EINVAL;
+ }
+
+ /* Drop old route. */
+ dst_release(skb->dst);
+
+ skb->dst = dst;
+ return 0;
+}
+#endif
+
+static inline int ip6_maybe_reroute(struct sk_buff *skb)
+{
+#ifdef CONFIG_NETFILTER
+ if (skb->nfcache & NFC_ALTERED){
+ if (ip6_route_me_harder(skb) != 0){
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ }
+#endif /* CONFIG_NETFILTER */
+ return dst_output(skb);
+}
+
+/*
+ * xmit an sk_buff (used by TCP)
+ */
+
+int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+ struct ipv6_txoptions *opt, int ipfragok)
+{
+ struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
+ struct in6_addr *first_hop = &fl->fl6_dst;
+ struct dst_entry *dst = skb->dst;
+ struct ipv6hdr *hdr;
+ u8 proto = fl->proto;
+ int seg_len = skb->len;
+ int hlimit;
+ u32 mtu;
+
+ if (opt) {
+ int head_room;
+
+ /* First: exthdrs may take lots of space (~8K for now)
+ MAX_HEADER is not enough.
+ */
+ head_room = opt->opt_nflen + opt->opt_flen;
+ seg_len += head_room;
+ head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+
+ if (skb_headroom(skb) < head_room) {
+ struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
+ kfree_skb(skb);
+ skb = skb2;
+ if (skb == NULL) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ return -ENOBUFS;
+ }
+ if (sk)
+ skb_set_owner_w(skb, sk);
+ }
+ if (opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
+ }
+
+ hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
+
+ /*
+ * Fill in the IPv6 header
+ */
+
+ *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
+ hlimit = -1;
+ if (np)
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+ if (hlimit < 0)
+ hlimit = ipv6_get_hoplimit(dst->dev);
+
+ hdr->payload_len = htons(seg_len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = hlimit;
+
+ ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+ ipv6_addr_copy(&hdr->daddr, first_hop);
+
+ mtu = dst_mtu(dst);
+ if ((skb->len <= mtu) || ipfragok) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+ }
+
+ if (net_ratelimit())
+ printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+}
+
+/*
+ * To avoid extra problems ND packets are send through this
+ * routine. It's code duplication but I really want to avoid
+ * extra checks since ipv6_build_header is used by TCP (which
+ * is for us performance critical)
+ */
+
+int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
+ struct in6_addr *saddr, struct in6_addr *daddr,
+ int proto, int len)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *hdr;
+ int totlen;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->dev = dev;
+
+ totlen = len + sizeof(struct ipv6hdr);
+
+ hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
+ skb->nh.ipv6h = hdr;
+
+ *(u32*)hdr = htonl(0x60000000);
+
+ hdr->payload_len = htons(len);
+ hdr->nexthdr = proto;
+ hdr->hop_limit = np->hop_limit;
+
+ ipv6_addr_copy(&hdr->saddr, saddr);
+ ipv6_addr_copy(&hdr->daddr, daddr);
+
+ return 0;
+}
+
+static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
+{
+ struct ip6_ra_chain *ra;
+ struct sock *last = NULL;
+
+ read_lock(&ip6_ra_lock);
+ for (ra = ip6_ra_chain; ra; ra = ra->next) {
+ struct sock *sk = ra->sk;
+ if (sk && ra->sel == sel) {
+ if (last) {
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2)
+ rawv6_rcv(last, skb2);
+ }
+ last = sk;
+ }
+ }
+
+ if (last) {
+ rawv6_rcv(last, skb);
+ read_unlock(&ip6_ra_lock);
+ return 1;
+ }
+ read_unlock(&ip6_ra_lock);
+ return 0;
+}
+
+static inline int ip6_forward_finish(struct sk_buff *skb)
+{
+ return dst_output(skb);
+}
+
+int ip6_forward(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb->dst;
+ struct ipv6hdr *hdr = skb->nh.ipv6h;
+ struct inet6_skb_parm *opt = IP6CB(skb);
+
+ if (ipv6_devconf.forwarding == 0)
+ goto error;
+
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /*
+ * We DO NOT make any processing on
+ * RA packets, pushing them to user level AS IS
+ * without ane WARRANTY that application will be able
+ * to interpret them. The reason is that we
+ * cannot make anything clever here.
+ *
+ * We are not end-node, so that if packet contains
+ * AH/ESP, we cannot make anything.
+ * Defragmentation also would be mistake, RA packets
+ * cannot be fragmented, because there is no warranty
+ * that different fragments will go along one path. --ANK
+ */
+ if (opt->ra) {
+ u8 *ptr = skb->nh.raw + opt->ra;
+ if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
+ return 0;
+ }
+
+ /*
+ * check and decrement ttl
+ */
+ if (hdr->hop_limit <= 1) {
+ /* Force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
+ 0, skb->dev);
+
+ kfree_skb(skb);
+ return -ETIMEDOUT;
+ }
+
+ if (!xfrm6_route_forward(skb)) {
+ IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
+ goto drop;
+ }
+ dst = skb->dst;
+
+ /* IPv6 specs say nothing about it, but it is clear that we cannot
+ send redirects to source routed frames.
+ */
+ if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
+ struct in6_addr *target = NULL;
+ struct rt6_info *rt;
+ struct neighbour *n = dst->neighbour;
+
+ /*
+ * incoming and outgoing devices are the same
+ * send a redirect.
+ */
+
+ rt = (struct rt6_info *) dst;
+ if ((rt->rt6i_flags & RTF_GATEWAY))
+ target = (struct in6_addr*)&n->primary_key;
+ else
+ target = &hdr->daddr;
+
+ /* Limit redirects both by destination (here)
+ and by source (inside ndisc_send_redirect)
+ */
+ if (xrlim_allow(dst, 1*HZ))
+ ndisc_send_redirect(skb, n, target);
+ } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
+ |IPV6_ADDR_LINKLOCAL)) {
+ /* This check is security critical. */
+ goto error;
+ }
+
+ if (skb->len > dst_mtu(dst)) {
+ /* Again, force OUTPUT device used as source address */
+ skb->dev = dst->dev;
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
+ IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
+ IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ if (skb_cow(skb, dst->dev->hard_header_len)) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ goto drop;
+ }
+
+ hdr = skb->nh.ipv6h;
+
+ /* Mangling hops number delayed to point after skb COW */
+
+ hdr->hop_limit--;
+
+ IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
+ return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
+
+error:
+ IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+{
+ to->pkt_type = from->pkt_type;
+ to->priority = from->priority;
+ to->protocol = from->protocol;
+ to->security = from->security;
+ dst_release(to->dst);
+ to->dst = dst_clone(from->dst);
+ to->dev = from->dev;
+
+#ifdef CONFIG_NET_SCHED
+ to->tc_index = from->tc_index;
+#endif
+#ifdef CONFIG_NETFILTER
+ to->nfmark = from->nfmark;
+ /* Connection association is same as pre-frag packet */
+ to->nfct = from->nfct;
+ nf_conntrack_get(to->nfct);
+ to->nfctinfo = from->nfctinfo;
+#ifdef CONFIG_BRIDGE_NETFILTER
+ nf_bridge_put(to->nf_bridge);
+ to->nf_bridge = from->nf_bridge;
+ nf_bridge_get(to->nf_bridge);
+#endif
+#ifdef CONFIG_NETFILTER_DEBUG
+ to->nf_debug = from->nf_debug;
+#endif
+#endif
+}
+
+int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
+{
+ u16 offset = sizeof(struct ipv6hdr);
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
+ unsigned int packet_len = skb->tail - skb->nh.raw;
+ int found_rhdr = 0;
+ *nexthdr = &skb->nh.ipv6h->nexthdr;
+
+ while (offset + 1 <= packet_len) {
+
+ switch (**nexthdr) {
+
+ case NEXTHDR_HOP:
+ case NEXTHDR_ROUTING:
+ case NEXTHDR_DEST:
+ if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
+ if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
+ offset += ipv6_optlen(exthdr);
+ *nexthdr = &exthdr->nexthdr;
+ exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
+ break;
+ default :
+ return offset;
+ }
+ }
+
+ return offset;
+}
+
+static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
+{
+ struct net_device *dev;
+ struct sk_buff *frag;
+ struct rt6_info *rt = (struct rt6_info*)skb->dst;
+ struct ipv6hdr *tmp_hdr;
+ struct frag_hdr *fh;
+ unsigned int mtu, hlen, left, len;
+ u32 frag_id = 0;
+ int ptr, offset = 0, err=0;
+ u8 *prevhdr, nexthdr = 0;
+
+ dev = rt->u.dst.dev;
+ hlen = ip6_find_1stfragopt(skb, &prevhdr);
+ nexthdr = *prevhdr;
+
+ mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
+
+ if (skb_shinfo(skb)->frag_list) {
+ int first_len = skb_pagelen(skb);
+
+ if (first_len - hlen > mtu ||
+ ((first_len - hlen) & 7) ||
+ skb_cloned(skb))
+ goto slow_path;
+
+ for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
+ /* Correct geometry. */
+ if (frag->len > mtu ||
+ ((frag->len & 7) && frag->next) ||
+ skb_headroom(frag) < hlen)
+ goto slow_path;
+
+ /* Correct socket ownership. */
+ if (frag->sk == NULL)
+ goto slow_path;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag))
+ goto slow_path;
+ }
+
+ err = 0;
+ offset = 0;
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = NULL;
+ /* BUILD HEADER */
+
+ tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
+ if (!tmp_hdr) {
+ IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ return -ENOMEM;
+ }
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+ memcpy(tmp_hdr, skb->nh.raw, hlen);
+ __skb_pull(skb, hlen);
+ fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
+ skb->nh.raw = __skb_push(skb, hlen);
+ memcpy(skb->nh.raw, tmp_hdr, hlen);
+
+ ipv6_select_ident(skb, fh);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ frag_id = fh->identification;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down. */
+ if (frag) {
+ frag->ip_summed = CHECKSUM_NONE;
+ frag->h.raw = frag->data;
+ fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
+ frag->nh.raw = __skb_push(frag, hlen);
+ memcpy(frag->nh.raw, tmp_hdr, hlen);
+ offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(offset);
+ if (frag->next != NULL)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = frag_id;
+ frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+ }
+
+ err = output(skb);
+ if (err || !frag)
+ break;
+
+ skb = frag;
+ frag = skb->next;
+ skb->next = NULL;
+ }
+
+ if (tmp_hdr)
+ kfree(tmp_hdr);
+
+ if (err == 0) {
+ IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ return 0;
+ }
+
+ while (frag) {
+ skb = frag->next;
+ kfree_skb(frag);
+ frag = skb;
+ }
+
+ IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ return err;
+ }
+
+slow_path:
+ left = skb->len - hlen; /* Space per frame */
+ ptr = hlen; /* Where to start from */
+
+ /*
+ * Fragment the datagram.
+ */
+
+ *prevhdr = NEXTHDR_FRAGMENT;
+
+ /*
+ * Keep copying data until we run out.
+ */
+ while(left > 0) {
+ len = left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > mtu)
+ len = mtu;
+ /* IF: we are not sending upto and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < left) {
+ len &= ~7;
+ }
+ /*
+ * Allocate buffer.
+ */
+
+ if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+ NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
+ IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ err = -ENOMEM;
+ goto fail;
+ }
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
+ skb_put(frag, len + hlen + sizeof(struct frag_hdr));
+ frag->nh.raw = frag->data;
+ fh = (struct frag_hdr*)(frag->data + hlen);
+ frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ memcpy(frag->nh.raw, skb->data, hlen);
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ if (frag_id) {
+ ipv6_select_ident(skb, fh);
+ frag_id = fh->identification;
+ } else
+ fh->identification = frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ if (skb_copy_bits(skb, ptr, frag->h.raw, len))
+ BUG();
+ left -= len;
+
+ fh->frag_off = htons(offset);
+ if (left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ ptr += len;
+ offset += len;
+
+ /*
+ * Put this fragment into the sending queue.
+ */
+
+ IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
+
+ err = output(frag);
+ if (err)
+ goto fail;
+ }
+ kfree_skb(skb);
+ IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
+ return err;
+
+fail:
+ kfree_skb(skb);
+ IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
+ return err;
+}
+
+int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
+{
+ int err = 0;
+
+ *dst = NULL;
+ if (sk) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ *dst = sk_dst_check(sk, np->dst_cookie);
+ if (*dst) {
+ struct rt6_info *rt = (struct rt6_info*)*dst;
+
+ /* Yes, checking route validity in not connected
+ case is not very simple. Take into account,
+ that we do not support routing by source, TOS,
+ and MSG_DONTROUTE --ANK (980726)
+
+ 1. If route was host route, check that
+ cached destination is current.
+ If it is network route, we still may
+ check its validity using saved pointer
+ to the last used address: daddr_cache.
+ We do not want to save whole address now,
+ (because main consumer of this service
+ is tcp, which has not this problem),
+ so that the last trick works only on connected
+ sockets.
+ 2. oif also should be the same.
+ */
+
+ if (((rt->rt6i_dst.plen != 128 ||
+ !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
+ && (np->daddr_cache == NULL ||
+ !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
+ || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
+ dst_release(*dst);
+ *dst = NULL;
+ }
+ }
+ }
+
+ if (*dst == NULL)
+ *dst = ip6_route_output(sk, fl);
+
+ if ((err = (*dst)->error))
+ goto out_err_release;
+
+ if (ipv6_addr_any(&fl->fl6_src)) {
+ err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
+
+ if (err) {
+#if IP6_DEBUG >= 2
+ printk(KERN_DEBUG "ip6_dst_lookup: "
+ "no available source address\n");
+#endif
+ goto out_err_release;
+ }
+ }
+
+ return 0;
+
+out_err_release:
+ dst_release(*dst);
+ *dst = NULL;
+ return err;
+}
+
+int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
+ void *from, int length, int transhdrlen,
+ int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
+ unsigned int flags)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+ unsigned int maxfraglen, fragheaderlen;
+ int exthdrlen;
+ int hh_len;
+ int mtu;
+ int copy;
+ int err;
+ int offset = 0;
+ int csummode = CHECKSUM_NONE;
+
+ if (flags&MSG_PROBE)
+ return 0;
+ if (skb_queue_empty(&sk->sk_write_queue)) {
+ /*
+ * setup for corking
+ */
+ if (opt) {
+ if (np->cork.opt == NULL) {
+ np->cork.opt = kmalloc(opt->tot_len,
+ sk->sk_allocation);
+ if (unlikely(np->cork.opt == NULL))
+ return -ENOBUFS;
+ } else if (np->cork.opt->tot_len < opt->tot_len) {
+ printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
+ return -EINVAL;
+ }
+ memcpy(np->cork.opt, opt, opt->tot_len);
+ inet->cork.flags |= IPCORK_OPT;
+ /* need source address above miyazawa*/
+ }
+ dst_hold(&rt->u.dst);
+ np->cork.rt = rt;
+ inet->cork.fl = *fl;
+ np->cork.hop_limit = hlimit;
+ inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
+ if (dst_allfrag(rt->u.dst.path))
+ inet->cork.flags |= IPCORK_ALLFRAG;
+ inet->cork.length = 0;
+ sk->sk_sndmsg_page = NULL;
+ sk->sk_sndmsg_off = 0;
+ exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
+ length += exthdrlen;
+ transhdrlen += exthdrlen;
+ } else {
+ rt = np->cork.rt;
+ fl = &inet->cork.fl;
+ if (inet->cork.flags & IPCORK_OPT)
+ opt = np->cork.opt;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ mtu = inet->cork.fragsize;
+ }
+
+ hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+
+ fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
+
+ if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+ if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
+ ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
+ }
+
+ /*
+ * Let's try using as much space as possible.
+ * Use MTU if total length of the message fits into the MTU.
+ * Otherwise, we need to reserve fragment header and
+ * fragment alignment (= 8-15 octects, in total).
+ *
+ * Note that we may need to "move" the data from the tail of
+ * of the buffer to the new fragment when we split
+ * the message.
+ *
+ * FIXME: It may be fragmented into multiple chunks
+ * at once if non-fragmentable extension headers
+ * are too large.
+ * --yoshfuji
+ */
+
+ inet->cork.length += length;
+
+ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ goto alloc_new_skb;
+
+ while (length > 0) {
+ /* Check if the remaining data fits into current packet. */
+ copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
+ if (copy < length)
+ copy = maxfraglen - skb->len;
+
+ if (copy <= 0) {
+ char *data;
+ unsigned int datalen;
+ unsigned int fraglen;
+ unsigned int fraggap;
+ unsigned int alloclen;
+ struct sk_buff *skb_prev;
+alloc_new_skb:
+ skb_prev = skb;
+
+ /* There's no room in the current skb */
+ if (skb_prev)
+ fraggap = skb_prev->len - maxfraglen;
+ else
+ fraggap = 0;
+
+ /*
+ * If remaining data exceeds the mtu,
+ * we know we need more fragment(s).
+ */
+ datalen = length + fraggap;
+ if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
+ datalen = maxfraglen - fragheaderlen;
+
+ fraglen = datalen + fragheaderlen;
+ if ((flags & MSG_MORE) &&
+ !(rt->u.dst.dev->features&NETIF_F_SG))
+ alloclen = mtu;
+ else
+ alloclen = datalen + fragheaderlen;
+
+ /*
+ * The last fragment gets additional space at tail.
+ * Note: we overallocate on fragments with MSG_MODE
+ * because we have no idea if we're the last one.
+ */
+ if (datalen == length + fraggap)
+ alloclen += rt->u.dst.trailer_len;
+
+ /*
+ * We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloclen += sizeof(struct frag_hdr);
+
+ if (transhdrlen) {
+ skb = sock_alloc_send_skb(sk,
+ alloclen + hh_len,
+ (flags & MSG_DONTWAIT), &err);
+ } else {
+ skb = NULL;
+ if (atomic_read(&sk->sk_wmem_alloc) <=
+ 2 * sk->sk_sndbuf)
+ skb = sock_wmalloc(sk,
+ alloclen + hh_len, 1,
+ sk->sk_allocation);
+ if (unlikely(skb == NULL))
+ err = -ENOBUFS;
+ }
+ if (skb == NULL)
+ goto error;
+ /*
+ * Fill in the control structures
+ */
+ skb->ip_summed = csummode;
+ skb->csum = 0;
+ /* reserve for fragmentation */
+ skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
+
+ /*
+ * Find where to start putting bytes
+ */
+ data = skb_put(skb, fraglen);
+ skb->nh.raw = data + exthdrlen;
+ data += fragheaderlen;
+ skb->h.raw = data + exthdrlen;
+
+ if (fraggap) {
+ skb->csum = skb_copy_and_csum_bits(
+ skb_prev, maxfraglen,
+ data + transhdrlen, fraggap, 0);
+ skb_prev->csum = csum_sub(skb_prev->csum,
+ skb->csum);
+ data += fraggap;
+ skb_trim(skb_prev, maxfraglen);
+ }
+ copy = datalen - transhdrlen - fraggap;
+ if (copy < 0) {
+ err = -EINVAL;
+ kfree_skb(skb);
+ goto error;
+ } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
+ err = -EFAULT;
+ kfree_skb(skb);
+ goto error;
+ }
+
+ offset += copy;
+ length -= datalen - fraggap;
+ transhdrlen = 0;
+ exthdrlen = 0;
+ csummode = CHECKSUM_NONE;
+
+ /*
+ * Put the packet on the pending queue
+ */
+ __skb_queue_tail(&sk->sk_write_queue, skb);
+ continue;
+ }
+
+ if (copy > length)
+ copy = length;
+
+ if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
+ unsigned int off;
+
+ off = skb->len;
+ if (getfrag(from, skb_put(skb, copy),
+ offset, copy, off, skb) < 0) {
+ __skb_trim(skb, off);
+ err = -EFAULT;
+ goto error;
+ }
+ } else {
+ int i = skb_shinfo(skb)->nr_frags;
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
+ struct page *page = sk->sk_sndmsg_page;
+ int off = sk->sk_sndmsg_off;
+ unsigned int left;
+
+ if (page && (left = PAGE_SIZE - off) > 0) {
+ if (copy >= left)
+ copy = left;
+ if (page != frag->page) {
+ if (i == MAX_SKB_FRAGS) {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ get_page(page);
+ skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ }
+ } else if(i < MAX_SKB_FRAGS) {
+ if (copy > PAGE_SIZE)
+ copy = PAGE_SIZE;
+ page = alloc_pages(sk->sk_allocation, 0);
+ if (page == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
+
+ skb_fill_page_desc(skb, i, page, 0, 0);
+ frag = &skb_shinfo(skb)->frags[i];
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+ } else {
+ err = -EMSGSIZE;
+ goto error;
+ }
+ if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
+ err = -EFAULT;
+ goto error;
+ }
+ sk->sk_sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ }
+ offset += copy;
+ length -= copy;
+ }
+ return 0;
+error:
+ inet->cork.length -= length;
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ return err;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+ struct sk_buff *skb, *tmp_skb;
+ struct sk_buff **tail_skb;
+ struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6hdr *hdr;
+ struct ipv6_txoptions *opt = np->cork.opt;
+ struct rt6_info *rt = np->cork.rt;
+ struct flowi *fl = &inet->cork.fl;
+ unsigned char proto = fl->proto;
+ int err = 0;
+
+ if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
+ goto out;
+ tail_skb = &(skb_shinfo(skb)->frag_list);
+
+ /* move skb->data to ip header from ext header */
+ if (skb->data < skb->nh.raw)
+ __skb_pull(skb, skb->nh.raw - skb->data);
+ while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
+ *tail_skb = tmp_skb;
+ tail_skb = &(tmp_skb->next);
+ skb->len += tmp_skb->len;
+ skb->data_len += tmp_skb->len;
+#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
+ skb->truesize += tmp_skb->truesize;
+ __sock_put(tmp_skb->sk);
+ tmp_skb->destructor = NULL;
+ tmp_skb->sk = NULL;
+#endif
+ }
+
+ ipv6_addr_copy(final_dst, &fl->fl6_dst);
+ __skb_pull(skb, skb->h.raw - skb->nh.raw);
+ if (opt && opt->opt_flen)
+ ipv6_push_frag_opts(skb, opt, &proto);
+ if (opt && opt->opt_nflen)
+ ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
+
+ skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
+
+ *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
+
+ if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ else
+ hdr->payload_len = 0;
+ hdr->hop_limit = np->cork.hop_limit;
+ hdr->nexthdr = proto;
+ ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+ ipv6_addr_copy(&hdr->daddr, final_dst);
+
+ skb->dst = dst_clone(&rt->u.dst);
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
+ if (err) {
+ if (err > 0)
+ err = inet->recverr ? net_xmit_errno(err) : 0;
+ if (err)
+ goto error;
+ }
+
+out:
+ inet->cork.flags &= ~IPCORK_OPT;
+ if (np->cork.opt) {
+ kfree(np->cork.opt);
+ np->cork.opt = NULL;
+ }
+ if (np->cork.rt) {
+ dst_release(&np->cork.rt->u.dst);
+ np->cork.rt = NULL;
+ inet->cork.flags &= ~IPCORK_ALLFRAG;
+ }
+ memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+ return err;
+error:
+ goto out;
+}
+
+void ip6_flush_pending_frames(struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct sk_buff *skb;
+
+ while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+ }
+
+ inet->cork.flags &= ~IPCORK_OPT;
+
+ if (np->cork.opt) {
+ kfree(np->cork.opt);
+ np->cork.opt = NULL;
+ }
+ if (np->cork.rt) {
+ dst_release(&np->cork.rt->u.dst);
+ np->cork.rt = NULL;
+ inet->cork.flags &= ~IPCORK_ALLFRAG;
+ }
+ memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
new file mode 100644
index 000000000000..3b1c9fa184ae
--- /dev/null
+++ b/net/ipv6/ip6_tunnel.c
@@ -0,0 +1,1163 @@
+/*
+ * IPv6 over IPv6 tunnel device
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Ville Nuorvala <vnuorval@tcs.hut.fi>
+ *
+ * $Id$
+ *
+ * Based on:
+ * linux/net/ipv6/sit.c
+ *
+ * RFC 2473
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/sockios.h>
+#include <linux/if.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmpv6.h>
+#include <linux/init.h>
+#include <linux/route.h>
+#include <linux/rtnetlink.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <asm/uaccess.h>
+#include <asm/atomic.h>
+
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/ip6_tunnel.h>
+#include <net/xfrm.h>
+#include <net/dsfield.h>
+#include <net/inet_ecn.h>
+
+MODULE_AUTHOR("Ville Nuorvala");
+MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
+MODULE_LICENSE("GPL");
+
+#define IPV6_TLV_TEL_DST_SIZE 8
+
+#ifdef IP6_TNL_DEBUG
+#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
+#else
+#define IP6_TNL_TRACE(x...) do {;} while(0)
+#endif
+
+#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
+
+#define HASH_SIZE 32
+
+#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
+ (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
+ (HASH_SIZE - 1))
+
+static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
+static int ip6ip6_tnl_dev_init(struct net_device *dev);
+static void ip6ip6_tnl_dev_setup(struct net_device *dev);
+
+/* the IPv6 tunnel fallback device */
+static struct net_device *ip6ip6_fb_tnl_dev;
+
+
+/* lists for storing tunnels in use */
+static struct ip6_tnl *tnls_r_l[HASH_SIZE];
+static struct ip6_tnl *tnls_wc[1];
+static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
+
+/* lock for the tunnel lists */
+static DEFINE_RWLOCK(ip6ip6_lock);
+
+static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
+{
+ struct dst_entry *dst = t->dst_cache;
+
+ if (dst && dst->obsolete &&
+ dst->ops->check(dst, t->dst_cookie) == NULL) {
+ t->dst_cache = NULL;
+ dst_release(dst);
+ return NULL;
+ }
+
+ return dst;
+}
+
+static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
+{
+ dst_release(t->dst_cache);
+ t->dst_cache = NULL;
+}
+
+static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
+{
+ struct rt6_info *rt = (struct rt6_info *) dst;
+ t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ dst_release(t->dst_cache);
+ t->dst_cache = dst;
+}
+
+/**
+ * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
+ * @remote: the address of the tunnel exit-point
+ * @local: the address of the tunnel entry-point
+ *
+ * Return:
+ * tunnel matching given end-points if found,
+ * else fallback tunnel if its device is up,
+ * else %NULL
+ **/
+
+static struct ip6_tnl *
+ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
+{
+ unsigned h0 = HASH(remote);
+ unsigned h1 = HASH(local);
+ struct ip6_tnl *t;
+
+ for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr) &&
+ (t->dev->flags & IFF_UP))
+ return t;
+ }
+ if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
+ return t;
+
+ return NULL;
+}
+
+/**
+ * ip6ip6_bucket - get head of list matching given tunnel parameters
+ * @p: parameters containing tunnel end-points
+ *
+ * Description:
+ * ip6ip6_bucket() returns the head of the list matching the
+ * &struct in6_addr entries laddr and raddr in @p.
+ *
+ * Return: head of IPv6 tunnel list
+ **/
+
+static struct ip6_tnl **
+ip6ip6_bucket(struct ip6_tnl_parm *p)
+{
+ struct in6_addr *remote = &p->raddr;
+ struct in6_addr *local = &p->laddr;
+ unsigned h = 0;
+ int prio = 0;
+
+ if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
+ prio = 1;
+ h = HASH(remote) ^ HASH(local);
+ }
+ return &tnls[prio][h];
+}
+
+/**
+ * ip6ip6_tnl_link - add tunnel to hash table
+ * @t: tunnel to be added
+ **/
+
+static void
+ip6ip6_tnl_link(struct ip6_tnl *t)
+{
+ struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
+
+ t->next = *tp;
+ write_lock_bh(&ip6ip6_lock);
+ *tp = t;
+ write_unlock_bh(&ip6ip6_lock);
+}
+
+/**
+ * ip6ip6_tnl_unlink - remove tunnel from hash table
+ * @t: tunnel to be removed
+ **/
+
+static void
+ip6ip6_tnl_unlink(struct ip6_tnl *t)
+{
+ struct ip6_tnl **tp;
+
+ for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
+ if (t == *tp) {
+ write_lock_bh(&ip6ip6_lock);
+ *tp = t->next;
+ write_unlock_bh(&ip6ip6_lock);
+ break;
+ }
+ }
+}
+
+/**
+ * ip6_tnl_create() - create a new tunnel
+ * @p: tunnel parameters
+ * @pt: pointer to new tunnel
+ *
+ * Description:
+ * Create tunnel matching given parameters.
+ *
+ * Return:
+ * 0 on success
+ **/
+
+static int
+ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
+{
+ struct net_device *dev;
+ struct ip6_tnl *t;
+ char name[IFNAMSIZ];
+ int err;
+
+ if (p->name[0]) {
+ strlcpy(name, p->name, IFNAMSIZ);
+ } else {
+ int i;
+ for (i = 1; i < IP6_TNL_MAX; i++) {
+ sprintf(name, "ip6tnl%d", i);
+ if (__dev_get_by_name(name) == NULL)
+ break;
+ }
+ if (i == IP6_TNL_MAX)
+ return -ENOBUFS;
+ }
+ dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
+ if (dev == NULL)
+ return -ENOMEM;
+
+ t = dev->priv;
+ dev->init = ip6ip6_tnl_dev_init;
+ t->parms = *p;
+
+ if ((err = register_netdevice(dev)) < 0) {
+ free_netdev(dev);
+ return err;
+ }
+ dev_hold(dev);
+
+ ip6ip6_tnl_link(t);
+ *pt = t;
+ return 0;
+}
+
+/**
+ * ip6ip6_tnl_locate - find or create tunnel matching given parameters
+ * @p: tunnel parameters
+ * @create: != 0 if allowed to create new tunnel if no match found
+ *
+ * Description:
+ * ip6ip6_tnl_locate() first tries to locate an existing tunnel
+ * based on @parms. If this is unsuccessful, but @create is set a new
+ * tunnel device is created and registered for use.
+ *
+ * Return:
+ * 0 if tunnel located or created,
+ * -EINVAL if parameters incorrect,
+ * -ENODEV if no matching tunnel available
+ **/
+
+static int
+ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
+{
+ struct in6_addr *remote = &p->raddr;
+ struct in6_addr *local = &p->laddr;
+ struct ip6_tnl *t;
+
+ if (p->proto != IPPROTO_IPV6)
+ return -EINVAL;
+
+ for (t = *ip6ip6_bucket(p); t; t = t->next) {
+ if (ipv6_addr_equal(local, &t->parms.laddr) &&
+ ipv6_addr_equal(remote, &t->parms.raddr)) {
+ *pt = t;
+ return (create ? -EEXIST : 0);
+ }
+ }
+ if (!create)
+ return -ENODEV;
+
+ return ip6_tnl_create(p, pt);
+}
+
+/**
+ * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
+ * @dev: the device to be destroyed
+ *
+ * Description:
+ * ip6ip6_tnl_dev_uninit() removes tunnel from its list
+ **/
+
+static void
+ip6ip6_tnl_dev_uninit(struct net_device *dev)
+{
+ struct ip6_tnl *t = dev->priv;
+
+ if (dev == ip6ip6_fb_tnl_dev) {
+ write_lock_bh(&ip6ip6_lock);
+ tnls_wc[0] = NULL;
+ write_unlock_bh(&ip6ip6_lock);
+ } else {
+ ip6ip6_tnl_unlink(t);
+ }
+ ip6_tnl_dst_reset(t);
+ dev_put(dev);
+}
+
+/**
+ * parse_tvl_tnl_enc_lim - handle encapsulation limit option
+ * @skb: received socket buffer
+ *
+ * Return:
+ * 0 if none was found,
+ * else index to encapsulation limit
+ **/
+
+static __u16
+parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
+{
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
+ __u8 nexthdr = ipv6h->nexthdr;
+ __u16 off = sizeof (*ipv6h);
+
+ while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
+ __u16 optlen = 0;
+ struct ipv6_opt_hdr *hdr;
+ if (raw + off + sizeof (*hdr) > skb->data &&
+ !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *) (raw + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
+ if (frag_hdr->frag_off)
+ break;
+ optlen = 8;
+ } else if (nexthdr == NEXTHDR_AUTH) {
+ optlen = (hdr->hdrlen + 2) << 2;
+ } else {
+ optlen = ipv6_optlen(hdr);
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ __u16 i = off + 2;
+ while (1) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ /* No more room for encapsulation limit */
+ if (i + sizeof (*tel) > off + optlen)
+ break;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+ /* return index of option if found and valid */
+ if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
+ tel->length == 1)
+ return i;
+ /* else jump to next option */
+ if (tel->type)
+ i += tel->length + 2;
+ else
+ i++;
+ }
+ }
+ nexthdr = hdr->nexthdr;
+ off += optlen;
+ }
+ return 0;
+}
+
+/**
+ * ip6ip6_err - tunnel error handler
+ *
+ * Description:
+ * ip6ip6_err() should handle errors in the tunnel according
+ * to the specifications in RFC 2473.
+ **/
+
+static void
+ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
+ struct ip6_tnl *t;
+ int rel_msg = 0;
+ int rel_type = ICMPV6_DEST_UNREACH;
+ int rel_code = ICMPV6_ADDR_UNREACH;
+ __u32 rel_info = 0;
+ __u16 len;
+
+ /* If the packet doesn't contain the original IPv6 header we are
+ in trouble since we might need the source address for further
+ processing of the error. */
+
+ read_lock(&ip6ip6_lock);
+ if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
+ goto out;
+
+ switch (type) {
+ __u32 teli;
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ __u32 mtu;
+ case ICMPV6_DEST_UNREACH:
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Path to destination invalid "
+ "or inactive!\n", t->parms.name);
+ rel_msg = 1;
+ break;
+ case ICMPV6_TIME_EXCEED:
+ if (code == ICMPV6_EXC_HOPLIMIT) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Too small hop limit or "
+ "routing loop in tunnel!\n",
+ t->parms.name);
+ rel_msg = 1;
+ }
+ break;
+ case ICMPV6_PARAMPROB:
+ /* ignore if parameter problem not caused by a tunnel
+ encapsulation limit sub-option */
+ if (code != ICMPV6_HDR_FIELD) {
+ break;
+ }
+ teli = parse_tlv_tnl_enc_lim(skb, skb->data);
+
+ if (teli && teli == ntohl(info) - 2) {
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
+ if (tel->encap_limit == 0) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Too small encapsulation "
+ "limit or routing loop in "
+ "tunnel!\n", t->parms.name);
+ rel_msg = 1;
+ }
+ }
+ break;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = ntohl(info) - offset;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+
+ if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
+ rel_type = ICMPV6_PKT_TOOBIG;
+ rel_code = 0;
+ rel_info = mtu;
+ rel_msg = 1;
+ }
+ break;
+ }
+ if (rel_msg && pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
+ struct rt6_info *rt;
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (!skb2)
+ goto out;
+
+ dst_release(skb2->dst);
+ skb2->dst = NULL;
+ skb_pull(skb2, offset);
+ skb2->nh.raw = skb2->data;
+
+ /* Try to guess incoming interface */
+ rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
+
+ if (rt && rt->rt6i_dev)
+ skb2->dev = rt->rt6i_dev;
+
+ icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
+
+ if (rt)
+ dst_release(&rt->u.dst);
+
+ kfree_skb(skb2);
+ }
+out:
+ read_unlock(&ip6ip6_lock);
+}
+
+static inline void ip6ip6_ecn_decapsulate(struct ipv6hdr *outer_iph,
+ struct sk_buff *skb)
+{
+ struct ipv6hdr *inner_iph = skb->nh.ipv6h;
+
+ if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+ IP6_ECN_set_ce(inner_iph);
+}
+
+/**
+ * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
+ * @skb: received socket buffer
+ *
+ * Return: 0
+ **/
+
+static int
+ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+{
+ struct sk_buff *skb = *pskb;
+ struct ipv6hdr *ipv6h;
+ struct ip6_tnl *t;
+
+ if (!pskb_may_pull(skb, sizeof (*ipv6h)))
+ goto discard;
+
+ ipv6h = skb->nh.ipv6h;
+
+ read_lock(&ip6ip6_lock);
+
+ if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
+ if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
+ t->stat.rx_dropped++;
+ read_unlock(&ip6ip6_lock);
+ goto discard;
+ }
+ secpath_reset(skb);
+ skb->mac.raw = skb->nh.raw;
+ skb->nh.raw = skb->data;
+ skb->protocol = htons(ETH_P_IPV6);
+ skb->pkt_type = PACKET_HOST;
+ memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
+ skb->dev = t->dev;
+ dst_release(skb->dst);
+ skb->dst = NULL;
+ if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY)
+ ipv6_copy_dscp(ipv6h, skb->nh.ipv6h);
+ ip6ip6_ecn_decapsulate(ipv6h, skb);
+ t->stat.rx_packets++;
+ t->stat.rx_bytes += skb->len;
+ netif_rx(skb);
+ read_unlock(&ip6ip6_lock);
+ return 0;
+ }
+ read_unlock(&ip6ip6_lock);
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
+discard:
+ return 1;
+}
+
+static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
+{
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ struct ipv6_txoptions *opt;
+ __u8 *raw;
+
+ int opt_len = sizeof(*opt) + 8;
+
+ if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
+ return NULL;
+ }
+ memset(opt, 0, opt_len);
+ opt->tot_len = opt_len;
+ opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
+ opt->opt_nflen = 8;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
+ tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
+ tel->length = 1;
+ tel->encap_limit = encap_limit;
+
+ raw = (__u8 *) opt->dst0opt;
+ raw[5] = IPV6_TLV_PADN;
+ raw[6] = 1;
+
+ return opt;
+}
+
+/**
+ * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
+ * @t: the outgoing tunnel device
+ * @hdr: IPv6 header from the incoming packet
+ *
+ * Description:
+ * Avoid trivial tunneling loop by checking that tunnel exit-point
+ * doesn't match source of incoming packet.
+ *
+ * Return:
+ * 1 if conflict,
+ * 0 else
+ **/
+
+static inline int
+ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
+{
+ return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr);
+}
+
+/**
+ * ip6ip6_tnl_xmit - encapsulate packet and send
+ * @skb: the outgoing socket buffer
+ * @dev: the outgoing tunnel device
+ *
+ * Description:
+ * Build new header and do some sanity checks on the packet before sending
+ * it.
+ *
+ * Return:
+ * 0
+ **/
+
+static int
+ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+ struct net_device_stats *stats = &t->stat;
+ struct ipv6hdr *ipv6h = skb->nh.ipv6h;
+ struct ipv6_txoptions *opt = NULL;
+ int encap_limit = -1;
+ __u16 offset;
+ struct flowi fl;
+ struct dst_entry *dst;
+ struct net_device *tdev;
+ int mtu;
+ int max_headroom = sizeof(struct ipv6hdr);
+ u8 proto;
+ int err;
+ int pkt_len;
+ int dsfield;
+
+ if (t->recursion++) {
+ stats->collisions++;
+ goto tx_err;
+ }
+ if (skb->protocol != htons(ETH_P_IPV6) ||
+ !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
+ ip6ip6_tnl_addr_conflict(t, ipv6h)) {
+ goto tx_err;
+ }
+ if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+ tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2, skb->dev);
+ goto tx_err;
+ }
+ encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ encap_limit = t->parms.encap_limit;
+ }
+ memcpy(&fl, &t->fl, sizeof (fl));
+ proto = fl.proto;
+
+ dsfield = ipv6_get_dsfield(ipv6h);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
+ fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
+ if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
+
+ if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
+ goto tx_err;
+
+ if ((dst = ip6_tnl_dst_check(t)) != NULL)
+ dst_hold(dst);
+ else
+ dst = ip6_route_output(NULL, &fl);
+
+ if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
+ goto tx_err_link_failure;
+
+ tdev = dst->dev;
+
+ if (tdev == dev) {
+ stats->collisions++;
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "%s: Local routing loop detected!\n",
+ t->parms.name);
+ goto tx_err_dst_release;
+ }
+ mtu = dst_mtu(dst) - sizeof (*ipv6h);
+ if (opt) {
+ max_headroom += 8;
+ mtu -= 8;
+ }
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ if (skb->dst && mtu < dst_mtu(skb->dst)) {
+ struct rt6_info *rt = (struct rt6_info *) skb->dst;
+ rt->rt6i_flags |= RTF_MODIFIED;
+ rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+ }
+ if (skb->len > mtu) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ goto tx_err_dst_release;
+ }
+
+ /*
+ * Okay, now see if we can stuff it in the buffer as-is.
+ */
+ max_headroom += LL_RESERVED_SPACE(tdev);
+
+ if (skb_headroom(skb) < max_headroom ||
+ skb_cloned(skb) || skb_shared(skb)) {
+ struct sk_buff *new_skb;
+
+ if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
+ goto tx_err_dst_release;
+
+ if (skb->sk)
+ skb_set_owner_w(new_skb, skb->sk);
+ kfree_skb(skb);
+ skb = new_skb;
+ }
+ dst_release(skb->dst);
+ skb->dst = dst_clone(dst);
+
+ skb->h.raw = skb->nh.raw;
+
+ if (opt)
+ ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
+
+ skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
+ ipv6h = skb->nh.ipv6h;
+ *(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
+ dsfield = INET_ECN_encapsulate(0, dsfield);
+ ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
+ ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+ ipv6h->hop_limit = t->parms.hop_limit;
+ ipv6h->nexthdr = proto;
+ ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
+ ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
+ nf_reset(skb);
+ pkt_len = skb->len;
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL,
+ skb->dst->dev, dst_output);
+
+ if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
+ stats->tx_bytes += pkt_len;
+ stats->tx_packets++;
+ } else {
+ stats->tx_errors++;
+ stats->tx_aborted_errors++;
+ }
+ ip6_tnl_dst_store(t, dst);
+
+ if (opt)
+ kfree(opt);
+
+ t->recursion--;
+ return 0;
+tx_err_link_failure:
+ stats->tx_carrier_errors++;
+ dst_link_failure(skb);
+tx_err_dst_release:
+ dst_release(dst);
+ if (opt)
+ kfree(opt);
+tx_err:
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ t->recursion--;
+ return 0;
+}
+
+static void ip6_tnl_set_cap(struct ip6_tnl *t)
+{
+ struct ip6_tnl_parm *p = &t->parms;
+ struct in6_addr *laddr = &p->laddr;
+ struct in6_addr *raddr = &p->raddr;
+ int ltype = ipv6_addr_type(laddr);
+ int rtype = ipv6_addr_type(raddr);
+
+ p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
+
+ if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
+ ((ltype|rtype) &
+ (IPV6_ADDR_UNICAST|
+ IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
+ IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
+ struct net_device *ldev = NULL;
+ int l_ok = 1;
+ int r_ok = 1;
+
+ if (p->link)
+ ldev = dev_get_by_index(p->link);
+
+ if (ltype&IPV6_ADDR_UNICAST && !ipv6_chk_addr(laddr, ldev, 0))
+ l_ok = 0;
+
+ if (rtype&IPV6_ADDR_UNICAST && ipv6_chk_addr(raddr, NULL, 0))
+ r_ok = 0;
+
+ if (l_ok && r_ok) {
+ if (ltype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_XMIT;
+ if (rtype&IPV6_ADDR_UNICAST)
+ p->flags |= IP6_TNL_F_CAP_RCV;
+ }
+ if (ldev)
+ dev_put(ldev);
+ }
+}
+
+static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
+{
+ struct net_device *dev = t->dev;
+ struct ip6_tnl_parm *p = &t->parms;
+ struct flowi *fl = &t->fl;
+
+ memcpy(&dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
+ memcpy(&dev->broadcast, &p->raddr, sizeof(struct in6_addr));
+
+ /* Set up flowi template */
+ ipv6_addr_copy(&fl->fl6_src, &p->laddr);
+ ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
+ fl->oif = p->link;
+ fl->fl6_flowlabel = 0;
+
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
+ fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
+ if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
+ fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
+
+ ip6_tnl_set_cap(t);
+
+ if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
+ dev->flags |= IFF_POINTOPOINT;
+ else
+ dev->flags &= ~IFF_POINTOPOINT;
+
+ dev->iflink = p->link;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
+ p->link, 0);
+
+ if (rt == NULL)
+ return;
+
+ if (rt->rt6i_dev) {
+ dev->hard_header_len = rt->rt6i_dev->hard_header_len +
+ sizeof (struct ipv6hdr);
+
+ dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
+
+ if (dev->mtu < IPV6_MIN_MTU)
+ dev->mtu = IPV6_MIN_MTU;
+ }
+ dst_release(&rt->u.dst);
+ }
+}
+
+/**
+ * ip6ip6_tnl_change - update the tunnel parameters
+ * @t: tunnel to be changed
+ * @p: tunnel configuration parameters
+ * @active: != 0 if tunnel is ready for use
+ *
+ * Description:
+ * ip6ip6_tnl_change() updates the tunnel parameters
+ **/
+
+static int
+ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
+{
+ ipv6_addr_copy(&t->parms.laddr, &p->laddr);
+ ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+ t->parms.flags = p->flags;
+ t->parms.hop_limit = p->hop_limit;
+ t->parms.encap_limit = p->encap_limit;
+ t->parms.flowinfo = p->flowinfo;
+ ip6ip6_tnl_link_config(t);
+ return 0;
+}
+
+/**
+ * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * @dev: virtual device associated with tunnel
+ * @ifr: parameters passed from userspace
+ * @cmd: command to be performed
+ *
+ * Description:
+ * ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels
+ * from userspace.
+ *
+ * The possible commands are the following:
+ * %SIOCGETTUNNEL: get tunnel parameters for device
+ * %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
+ * %SIOCCHGTUNNEL: change tunnel parameters to those given
+ * %SIOCDELTUNNEL: delete tunnel
+ *
+ * The fallback device "ip6tnl0", created during module
+ * initialization, can be used for creating other tunnel devices.
+ *
+ * Return:
+ * 0 on success,
+ * %-EFAULT if unable to copy data to or from userspace,
+ * %-EPERM if current process hasn't %CAP_NET_ADMIN set
+ * %-EINVAL if passed tunnel parameters are invalid,
+ * %-EEXIST if changing a tunnel's parameters would cause a conflict
+ * %-ENODEV if attempting to change or delete a nonexisting device
+ **/
+
+static int
+ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ int err = 0;
+ int create;
+ struct ip6_tnl_parm p;
+ struct ip6_tnl *t = NULL;
+
+ switch (cmd) {
+ case SIOCGETTUNNEL:
+ if (dev == ip6ip6_fb_tnl_dev) {
+ if (copy_from_user(&p,
+ ifr->ifr_ifru.ifru_data,
+ sizeof (p))) {
+ err = -EFAULT;
+ break;
+ }
+ if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
+ t = (struct ip6_tnl *) dev->priv;
+ else if (err)
+ break;
+ } else
+ t = (struct ip6_tnl *) dev->priv;
+
+ memcpy(&p, &t->parms, sizeof (p));
+ if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
+ err = -EFAULT;
+ }
+ break;
+ case SIOCADDTUNNEL:
+ case SIOCCHGTUNNEL:
+ err = -EPERM;
+ create = (cmd == SIOCADDTUNNEL);
+ if (!capable(CAP_NET_ADMIN))
+ break;
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
+ err = -EFAULT;
+ break;
+ }
+ if (!create && dev != ip6ip6_fb_tnl_dev) {
+ t = (struct ip6_tnl *) dev->priv;
+ }
+ if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
+ break;
+ }
+ if (cmd == SIOCCHGTUNNEL) {
+ if (t->dev != dev) {
+ err = -EEXIST;
+ break;
+ }
+ ip6ip6_tnl_unlink(t);
+ err = ip6ip6_tnl_change(t, &p);
+ ip6ip6_tnl_link(t);
+ netdev_state_change(dev);
+ }
+ if (copy_to_user(ifr->ifr_ifru.ifru_data,
+ &t->parms, sizeof (p))) {
+ err = -EFAULT;
+ } else {
+ err = 0;
+ }
+ break;
+ case SIOCDELTUNNEL:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ break;
+
+ if (dev == ip6ip6_fb_tnl_dev) {
+ if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
+ sizeof (p))) {
+ err = -EFAULT;
+ break;
+ }
+ err = ip6ip6_tnl_locate(&p, &t, 0);
+ if (err)
+ break;
+ if (t == ip6ip6_fb_tnl_dev->priv) {
+ err = -EPERM;
+ break;
+ }
+ } else {
+ t = (struct ip6_tnl *) dev->priv;
+ }
+ err = unregister_netdevice(t->dev);
+ break;
+ default:
+ err = -EINVAL;
+ }
+ return err;
+}
+
+/**
+ * ip6ip6_tnl_get_stats - return the stats for tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Return: stats for device
+ **/
+
+static struct net_device_stats *
+ip6ip6_tnl_get_stats(struct net_device *dev)
+{
+ return &(((struct ip6_tnl *) dev->priv)->stat);
+}
+
+/**
+ * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
+ * @dev: virtual device associated with tunnel
+ * @new_mtu: the new mtu
+ *
+ * Return:
+ * 0 on success,
+ * %-EINVAL if mtu too small
+ **/
+
+static int
+ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
+{
+ if (new_mtu < IPV6_MIN_MTU) {
+ return -EINVAL;
+ }
+ dev->mtu = new_mtu;
+ return 0;
+}
+
+/**
+ * ip6ip6_tnl_dev_setup - setup virtual tunnel device
+ * @dev: virtual device associated with tunnel
+ *
+ * Description:
+ * Initialize function pointers and device parameters
+ **/
+
+static void ip6ip6_tnl_dev_setup(struct net_device *dev)
+{
+ SET_MODULE_OWNER(dev);
+ dev->uninit = ip6ip6_tnl_dev_uninit;
+ dev->destructor = free_netdev;
+ dev->hard_start_xmit = ip6ip6_tnl_xmit;
+ dev->get_stats = ip6ip6_tnl_get_stats;
+ dev->do_ioctl = ip6ip6_tnl_ioctl;
+ dev->change_mtu = ip6ip6_tnl_change_mtu;
+
+ dev->type = ARPHRD_TUNNEL6;
+ dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
+ dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
+ dev->flags |= IFF_NOARP;
+ dev->addr_len = sizeof(struct in6_addr);
+}
+
+
+/**
+ * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+
+static inline void
+ip6ip6_tnl_dev_init_gen(struct net_device *dev)
+{
+ struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+ t->fl.proto = IPPROTO_IPV6;
+ t->dev = dev;
+ strcpy(t->parms.name, dev->name);
+}
+
+/**
+ * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
+ * @dev: virtual device associated with tunnel
+ **/
+
+static int
+ip6ip6_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
+ ip6ip6_tnl_dev_init_gen(dev);
+ ip6ip6_tnl_link_config(t);
+ return 0;
+}
+
+/**
+ * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
+ * @dev: fallback device
+ *
+ * Return: 0
+ **/
+
+static int
+ip6ip6_fb_tnl_dev_init(struct net_device *dev)
+{
+ struct ip6_tnl *t = dev->priv;
+ ip6ip6_tnl_dev_init_gen(dev);
+ dev_hold(dev);
+ tnls_wc[0] = t;
+ return 0;
+}
+
+static struct xfrm6_tunnel ip6ip6_handler = {
+ .handler = ip6ip6_rcv,
+ .err_handler = ip6ip6_err,
+};
+
+/**
+ * ip6_tunnel_init - register protocol and reserve needed resources
+ *
+ * Return: 0 on success
+ **/
+
+static int __init ip6_tunnel_init(void)
+{
+ int err;
+
+ if (xfrm6_tunnel_register(&ip6ip6_handler) < 0) {
+ printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
+ return -EAGAIN;
+ }
+ ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
+ ip6ip6_tnl_dev_setup);
+
+ if (!ip6ip6_fb_tnl_dev) {
+ err = -ENOMEM;
+ goto fail;
+ }
+ ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
+
+ if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
+ free_netdev(ip6ip6_fb_tnl_dev);
+ goto fail;
+ }
+ return 0;
+fail:
+ xfrm6_tunnel_deregister(&ip6ip6_handler);
+ return err;
+}
+
+/**
+ * ip6_tunnel_cleanup - free resources and unregister protocol
+ **/
+
+static void __exit ip6_tunnel_cleanup(void)
+{
+ if (xfrm6_tunnel_deregister(&ip6ip6_handler) < 0)
+ printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
+
+ unregister_netdev(ip6ip6_fb_tnl_dev);
+}
+
+module_init(ip6_tunnel_init);
+module_exit(ip6_tunnel_cleanup);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
new file mode 100644
index 000000000000..6cde5310cd76
--- /dev/null
+++ b/net/ipv6/ipcomp6.c
@@ -0,0 +1,524 @@
+/*
+ * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
+ *
+ * Copyright (C)2003 USAGI/WIDE Project
+ *
+ * Author Mitsuru KANDA <mk@linux-ipv6.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+/*
+ * [Memo]
+ *
+ * Outbound:
+ * The compression of IP datagram MUST be done before AH/ESP processing,
+ * fragmentation, and the addition of Hop-by-Hop/Routing header.
+ *
+ * Inbound:
+ * The decompression of IP datagram MUST be done after the reassembly,
+ * AH/ESP processing.
+ */
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/ip.h>
+#include <net/xfrm.h>
+#include <net/ipcomp.h>
+#include <asm/scatterlist.h>
+#include <asm/semaphore.h>
+#include <linux/crypto.h>
+#include <linux/pfkeyv2.h>
+#include <linux/random.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/list.h>
+#include <linux/vmalloc.h>
+#include <linux/rtnetlink.h>
+#include <net/icmp.h>
+#include <net/ipv6.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+
+struct ipcomp6_tfms {
+ struct list_head list;
+ struct crypto_tfm **tfms;
+ int users;
+};
+
+static DECLARE_MUTEX(ipcomp6_resource_sem);
+static void **ipcomp6_scratches;
+static int ipcomp6_scratch_users;
+static LIST_HEAD(ipcomp6_tfms_list);
+
+static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
+{
+ int err = 0;
+ u8 nexthdr = 0;
+ int hdr_len = skb->h.raw - skb->nh.raw;
+ unsigned char *tmp_hdr = NULL;
+ struct ipv6hdr *iph;
+ int plen, dlen;
+ struct ipcomp_data *ipcd = x->data;
+ u8 *start, *scratch;
+ struct crypto_tfm *tfm;
+ int cpu;
+
+ if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+ skb_linearize(skb, GFP_ATOMIC) != 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Remove ipcomp header and decompress original payload */
+ iph = skb->nh.ipv6h;
+ tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
+ if (!tmp_hdr)
+ goto out;
+ memcpy(tmp_hdr, iph, hdr_len);
+ nexthdr = *(u8 *)skb->data;
+ skb_pull(skb, sizeof(struct ipv6_comp_hdr));
+ skb->nh.raw += sizeof(struct ipv6_comp_hdr);
+ memcpy(skb->nh.raw, tmp_hdr, hdr_len);
+ iph = skb->nh.ipv6h;
+ iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
+ skb->h.raw = skb->data;
+
+ /* decompression */
+ plen = skb->len;
+ dlen = IPCOMP_SCRATCH_SIZE;
+ start = skb->data;
+
+ cpu = get_cpu();
+ scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+ tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+ err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
+ if (err) {
+ err = -EINVAL;
+ goto out_put_cpu;
+ }
+
+ if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
+ err = -EINVAL;
+ goto out_put_cpu;
+ }
+
+ err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
+ if (err) {
+ goto out_put_cpu;
+ }
+
+ skb_put(skb, dlen - plen);
+ memcpy(skb->data, scratch, dlen);
+
+ iph = skb->nh.ipv6h;
+ iph->payload_len = htons(skb->len);
+
+out_put_cpu:
+ put_cpu();
+out:
+ if (tmp_hdr)
+ kfree(tmp_hdr);
+ if (err)
+ goto error_out;
+ return nexthdr;
+error_out:
+ return err;
+}
+
+static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ int err;
+ struct ipv6hdr *top_iph;
+ int hdr_len;
+ struct ipv6_comp_hdr *ipch;
+ struct ipcomp_data *ipcd = x->data;
+ int plen, dlen;
+ u8 *start, *scratch;
+ struct crypto_tfm *tfm;
+ int cpu;
+
+ hdr_len = skb->h.raw - skb->data;
+
+ /* check whether datagram len is larger than threshold */
+ if ((skb->len - hdr_len) < ipcd->threshold) {
+ goto out_ok;
+ }
+
+ if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
+ skb_linearize(skb, GFP_ATOMIC) != 0) {
+ goto out_ok;
+ }
+
+ /* compression */
+ plen = skb->len - hdr_len;
+ dlen = IPCOMP_SCRATCH_SIZE;
+ start = skb->h.raw;
+
+ cpu = get_cpu();
+ scratch = *per_cpu_ptr(ipcomp6_scratches, cpu);
+ tfm = *per_cpu_ptr(ipcd->tfms, cpu);
+
+ err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
+ if (err || (dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
+ put_cpu();
+ goto out_ok;
+ }
+ memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+ put_cpu();
+ pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
+
+ /* insert ipcomp header and replace datagram */
+ top_iph = (struct ipv6hdr *)skb->data;
+
+ top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ ipch = (struct ipv6_comp_hdr *)start;
+ ipch->nexthdr = *skb->nh.raw;
+ ipch->flags = 0;
+ ipch->cpi = htons((u16 )ntohl(x->id.spi));
+ *skb->nh.raw = IPPROTO_COMP;
+
+out_ok:
+ return 0;
+}
+
+static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ int type, int code, int offset, __u32 info)
+{
+ u32 spi;
+ struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
+ struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
+ struct xfrm_state *x;
+
+ if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
+ return;
+
+ spi = ntohl(ntohs(ipcomph->cpi));
+ x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
+ if (!x)
+ return;
+
+ printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
+ "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ spi, NIP6(iph->daddr));
+ xfrm_state_put(x);
+}
+
+static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
+{
+ struct xfrm_state *t = NULL;
+
+ t = xfrm_state_alloc();
+ if (!t)
+ goto out;
+
+ t->id.proto = IPPROTO_IPV6;
+ t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+ memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
+ memcpy(&t->sel, &x->sel, sizeof(t->sel));
+ t->props.family = AF_INET6;
+ t->props.mode = 1;
+ memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
+
+ t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family);
+ if (t->type == NULL)
+ goto error;
+
+ if (t->type->init_state(t, NULL))
+ goto error;
+
+ t->km.state = XFRM_STATE_VALID;
+ atomic_set(&t->tunnel_users, 1);
+
+out:
+ return t;
+
+error:
+ xfrm_state_put(t);
+ goto out;
+}
+
+static int ipcomp6_tunnel_attach(struct xfrm_state *x)
+{
+ int err = 0;
+ struct xfrm_state *t = NULL;
+ u32 spi;
+
+ spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+ if (spi)
+ t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
+ spi, IPPROTO_IPV6, AF_INET6);
+ if (!t) {
+ t = ipcomp6_tunnel_create(x);
+ if (!t) {
+ err = -EINVAL;
+ goto out;
+ }
+ xfrm_state_insert(t);
+ xfrm_state_hold(t);
+ }
+ x->tunnel = t;
+ atomic_inc(&t->tunnel_users);
+
+out:
+ return err;
+}
+
+static void ipcomp6_free_scratches(void)
+{
+ int i;
+ void **scratches;
+
+ if (--ipcomp6_scratch_users)
+ return;
+
+ scratches = ipcomp6_scratches;
+ if (!scratches)
+ return;
+
+ for_each_cpu(i) {
+ void *scratch = *per_cpu_ptr(scratches, i);
+ if (scratch)
+ vfree(scratch);
+ }
+
+ free_percpu(scratches);
+}
+
+static void **ipcomp6_alloc_scratches(void)
+{
+ int i;
+ void **scratches;
+
+ if (ipcomp6_scratch_users++)
+ return ipcomp6_scratches;
+
+ scratches = alloc_percpu(void *);
+ if (!scratches)
+ return NULL;
+
+ ipcomp6_scratches = scratches;
+
+ for_each_cpu(i) {
+ void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
+ if (!scratch)
+ return NULL;
+ *per_cpu_ptr(scratches, i) = scratch;
+ }
+
+ return scratches;
+}
+
+static void ipcomp6_free_tfms(struct crypto_tfm **tfms)
+{
+ struct ipcomp6_tfms *pos;
+ int cpu;
+
+ list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+ if (pos->tfms == tfms)
+ break;
+ }
+
+ BUG_TRAP(pos);
+
+ if (--pos->users)
+ return;
+
+ list_del(&pos->list);
+ kfree(pos);
+
+ if (!tfms)
+ return;
+
+ for_each_cpu(cpu) {
+ struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu);
+ if (tfm)
+ crypto_free_tfm(tfm);
+ }
+ free_percpu(tfms);
+}
+
+static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name)
+{
+ struct ipcomp6_tfms *pos;
+ struct crypto_tfm **tfms;
+ int cpu;
+
+ /* This can be any valid CPU ID so we don't need locking. */
+ cpu = smp_processor_id();
+
+ list_for_each_entry(pos, &ipcomp6_tfms_list, list) {
+ struct crypto_tfm *tfm;
+
+ tfms = pos->tfms;
+ tfm = *per_cpu_ptr(tfms, cpu);
+
+ if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) {
+ pos->users++;
+ return tfms;
+ }
+ }
+
+ pos = kmalloc(sizeof(*pos), GFP_KERNEL);
+ if (!pos)
+ return NULL;
+
+ pos->users = 1;
+ INIT_LIST_HEAD(&pos->list);
+ list_add(&pos->list, &ipcomp6_tfms_list);
+
+ pos->tfms = tfms = alloc_percpu(struct crypto_tfm *);
+ if (!tfms)
+ goto error;
+
+ for_each_cpu(cpu) {
+ struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0);
+ if (!tfm)
+ goto error;
+ *per_cpu_ptr(tfms, cpu) = tfm;
+ }
+
+ return tfms;
+
+error:
+ ipcomp6_free_tfms(tfms);
+ return NULL;
+}
+
+static void ipcomp6_free_data(struct ipcomp_data *ipcd)
+{
+ if (ipcd->tfms)
+ ipcomp6_free_tfms(ipcd->tfms);
+ ipcomp6_free_scratches();
+}
+
+static void ipcomp6_destroy(struct xfrm_state *x)
+{
+ struct ipcomp_data *ipcd = x->data;
+ if (!ipcd)
+ return;
+ xfrm_state_delete_tunnel(x);
+ down(&ipcomp6_resource_sem);
+ ipcomp6_free_data(ipcd);
+ up(&ipcomp6_resource_sem);
+ kfree(ipcd);
+
+ xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+}
+
+static int ipcomp6_init_state(struct xfrm_state *x, void *args)
+{
+ int err;
+ struct ipcomp_data *ipcd;
+ struct xfrm_algo_desc *calg_desc;
+
+ err = -EINVAL;
+ if (!x->calg)
+ goto out;
+
+ if (x->encap)
+ goto out;
+
+ err = -ENOMEM;
+ ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
+ if (!ipcd)
+ goto out;
+
+ memset(ipcd, 0, sizeof(*ipcd));
+ x->props.header_len = 0;
+ if (x->props.mode)
+ x->props.header_len += sizeof(struct ipv6hdr);
+
+ down(&ipcomp6_resource_sem);
+ if (!ipcomp6_alloc_scratches())
+ goto error;
+
+ ipcd->tfms = ipcomp6_alloc_tfms(x->calg->alg_name);
+ if (!ipcd->tfms)
+ goto error;
+ up(&ipcomp6_resource_sem);
+
+ if (x->props.mode) {
+ err = ipcomp6_tunnel_attach(x);
+ if (err)
+ goto error_tunnel;
+ }
+
+ calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
+ BUG_ON(!calg_desc);
+ ipcd->threshold = calg_desc->uinfo.comp.threshold;
+ x->data = ipcd;
+ err = 0;
+out:
+ return err;
+error_tunnel:
+ down(&ipcomp6_resource_sem);
+error:
+ ipcomp6_free_data(ipcd);
+ up(&ipcomp6_resource_sem);
+ kfree(ipcd);
+
+ goto out;
+}
+
+static struct xfrm_type ipcomp6_type =
+{
+ .description = "IPCOMP6",
+ .owner = THIS_MODULE,
+ .proto = IPPROTO_COMP,
+ .init_state = ipcomp6_init_state,
+ .destructor = ipcomp6_destroy,
+ .input = ipcomp6_input,
+ .output = ipcomp6_output,
+};
+
+static struct inet6_protocol ipcomp6_protocol =
+{
+ .handler = xfrm6_rcv,
+ .err_handler = ipcomp6_err,
+ .flags = INET6_PROTO_NOPOLICY,
+};
+
+static int __init ipcomp6_init(void)
+{
+ if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
+ printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
+ return -EAGAIN;
+ }
+ if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
+ printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
+ xfrm_unregister_type(&ipcomp6_type, AF_INET6);
+ return -EAGAIN;
+ }
+ return 0;
+}
+
+static void __exit ipcomp6_fini(void)
+{
+ if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0)
+ printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
+ if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
+ printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
+}
+
+module_init(ipcomp6_init);
+module_exit(ipcomp6_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+
+
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
new file mode 100644
index 000000000000..279ab86be662
--- /dev/null
+++ b/net/ipv6/ipv6_sockglue.c
@@ -0,0 +1,704 @@
+/*
+ * IPv6 BSD socket options interface
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * Based on linux/net/ipv4/ip_sockglue.c
+ *
+ * $Id: ipv6_sockglue.c,v 1.41 2002/02/01 22:01:04 davem Exp $
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * FIXME: Make the setsockopt code POSIX compliant: That is
+ *
+ * o Return -EINVAL for setsockopt of short lengths
+ * o Truncate getsockopt returns
+ * o Return an optlen of the truncated length if need be
+ *
+ * Changes:
+ * David L Stevens <dlstevens@us.ibm.com>:
+ * - added multicast source filtering API for MLDv2
+ */
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/sysctl.h>
+#include <linux/netfilter.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+#include <net/protocol.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/inet_common.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/uaccess.h>
+
+DEFINE_SNMP_STAT(struct ipstats_mib, ipv6_statistics);
+
+static struct packet_type ipv6_packet_type = {
+ .type = __constant_htons(ETH_P_IPV6),
+ .func = ipv6_rcv,
+};
+
+struct ip6_ra_chain *ip6_ra_chain;
+DEFINE_RWLOCK(ip6_ra_lock);
+
+int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
+{
+ struct ip6_ra_chain *ra, *new_ra, **rap;
+
+ /* RA packet may be delivered ONLY to IPPROTO_RAW socket */
+ if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
+ return -EINVAL;
+
+ new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
+
+ write_lock_bh(&ip6_ra_lock);
+ for (rap = &ip6_ra_chain; (ra=*rap) != NULL; rap = &ra->next) {
+ if (ra->sk == sk) {
+ if (sel>=0) {
+ write_unlock_bh(&ip6_ra_lock);
+ if (new_ra)
+ kfree(new_ra);
+ return -EADDRINUSE;
+ }
+
+ *rap = ra->next;
+ write_unlock_bh(&ip6_ra_lock);
+
+ if (ra->destructor)
+ ra->destructor(sk);
+ sock_put(sk);
+ kfree(ra);
+ return 0;
+ }
+ }
+ if (new_ra == NULL) {
+ write_unlock_bh(&ip6_ra_lock);
+ return -ENOBUFS;
+ }
+ new_ra->sk = sk;
+ new_ra->sel = sel;
+ new_ra->destructor = destructor;
+ new_ra->next = ra;
+ *rap = new_ra;
+ sock_hold(sk);
+ write_unlock_bh(&ip6_ra_lock);
+ return 0;
+}
+
+extern int ip6_mc_source(int add, int omode, struct sock *sk,
+ struct group_source_req *pgsr);
+extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+ struct group_filter __user *optval, int __user *optlen);
+
+
+int ipv6_setsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int optlen)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int val, valbool;
+ int retv = -ENOPROTOOPT;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+ return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+ if(level!=SOL_IPV6)
+ goto out;
+
+ if (optval == NULL)
+ val=0;
+ else if (get_user(val, (int __user *) optval))
+ return -EFAULT;
+
+ valbool = (val!=0);
+
+ lock_sock(sk);
+
+ switch (optname) {
+
+ case IPV6_ADDRFORM:
+ if (val == PF_INET) {
+ struct ipv6_txoptions *opt;
+ struct sk_buff *pktopt;
+
+ if (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_TCP)
+ break;
+
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ retv = -ENOTCONN;
+ break;
+ }
+
+ if (ipv6_only_sock(sk) ||
+ !(ipv6_addr_type(&np->daddr) & IPV6_ADDR_MAPPED)) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+
+ fl6_free_socklist(sk);
+ ipv6_sock_mc_close(sk);
+
+ if (sk->sk_protocol == IPPROTO_TCP) {
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ local_bh_disable();
+ sock_prot_dec_use(sk->sk_prot);
+ sock_prot_inc_use(&tcp_prot);
+ local_bh_enable();
+ sk->sk_prot = &tcp_prot;
+ tp->af_specific = &ipv4_specific;
+ sk->sk_socket->ops = &inet_stream_ops;
+ sk->sk_family = PF_INET;
+ tcp_sync_mss(sk, tp->pmtu_cookie);
+ } else {
+ local_bh_disable();
+ sock_prot_dec_use(sk->sk_prot);
+ sock_prot_inc_use(&udp_prot);
+ local_bh_enable();
+ sk->sk_prot = &udp_prot;
+ sk->sk_socket->ops = &inet_dgram_ops;
+ sk->sk_family = PF_INET;
+ }
+ opt = xchg(&np->opt, NULL);
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ pktopt = xchg(&np->pktoptions, NULL);
+ if (pktopt)
+ kfree_skb(pktopt);
+
+ sk->sk_destruct = inet_sock_destruct;
+#ifdef INET_REFCNT_DEBUG
+ atomic_dec(&inet6_sock_nr);
+#endif
+ module_put(THIS_MODULE);
+ retv = 0;
+ break;
+ }
+ goto e_inval;
+
+ case IPV6_V6ONLY:
+ if (inet_sk(sk)->num)
+ goto e_inval;
+ np->ipv6only = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_PKTINFO:
+ np->rxopt.bits.rxinfo = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_HOPLIMIT:
+ np->rxopt.bits.rxhlim = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_RTHDR:
+ if (val < 0 || val > 2)
+ goto e_inval;
+ np->rxopt.bits.srcrt = val;
+ retv = 0;
+ break;
+
+ case IPV6_HOPOPTS:
+ np->rxopt.bits.hopopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_DSTOPTS:
+ np->rxopt.bits.dstopts = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_FLOWINFO:
+ np->rxopt.bits.rxflow = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_PKTOPTIONS:
+ {
+ struct ipv6_txoptions *opt = NULL;
+ struct msghdr msg;
+ struct flowi fl;
+ int junk;
+
+ fl.fl6_flowlabel = 0;
+ fl.oif = sk->sk_bound_dev_if;
+
+ if (optlen == 0)
+ goto update;
+
+ /* 1K is probably excessive
+ * 1K is surely not enough, 2K per standard header is 16K.
+ */
+ retv = -EINVAL;
+ if (optlen > 64*1024)
+ break;
+
+ opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL);
+ retv = -ENOBUFS;
+ if (opt == NULL)
+ break;
+
+ memset(opt, 0, sizeof(*opt));
+ opt->tot_len = sizeof(*opt) + optlen;
+ retv = -EFAULT;
+ if (copy_from_user(opt+1, optval, optlen))
+ goto done;
+
+ msg.msg_controllen = optlen;
+ msg.msg_control = (void*)(opt+1);
+
+ retv = datagram_send_ctl(&msg, &fl, opt, &junk);
+ if (retv)
+ goto done;
+update:
+ retv = 0;
+ if (sk->sk_type == SOCK_STREAM) {
+ if (opt) {
+ struct tcp_sock *tp = tcp_sk(sk);
+ if (!((1 << sk->sk_state) &
+ (TCPF_LISTEN | TCPF_CLOSE))
+ && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
+ tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
+ tcp_sync_mss(sk, tp->pmtu_cookie);
+ }
+ }
+ opt = xchg(&np->opt, opt);
+ sk_dst_reset(sk);
+ } else {
+ write_lock(&sk->sk_dst_lock);
+ opt = xchg(&np->opt, opt);
+ write_unlock(&sk->sk_dst_lock);
+ sk_dst_reset(sk);
+ }
+
+done:
+ if (opt)
+ sock_kfree_s(sk, opt, opt->tot_len);
+ break;
+ }
+ case IPV6_UNICAST_HOPS:
+ if (val > 255 || val < -1)
+ goto e_inval;
+ np->hop_limit = val;
+ retv = 0;
+ break;
+
+ case IPV6_MULTICAST_HOPS:
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ if (val > 255 || val < -1)
+ goto e_inval;
+ np->mcast_hops = val;
+ retv = 0;
+ break;
+
+ case IPV6_MULTICAST_LOOP:
+ np->mc_loop = valbool;
+ retv = 0;
+ break;
+
+ case IPV6_MULTICAST_IF:
+ if (sk->sk_type == SOCK_STREAM)
+ goto e_inval;
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+ goto e_inval;
+
+ if (__dev_get_by_index(val) == NULL) {
+ retv = -ENODEV;
+ break;
+ }
+ np->mcast_oif = val;
+ retv = 0;
+ break;
+ case IPV6_ADD_MEMBERSHIP:
+ case IPV6_DROP_MEMBERSHIP:
+ {
+ struct ipv6_mreq mreq;
+
+ retv = -EFAULT;
+ if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ break;
+
+ if (optname == IPV6_ADD_MEMBERSHIP)
+ retv = ipv6_sock_mc_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+ else
+ retv = ipv6_sock_mc_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_multiaddr);
+ break;
+ }
+ case IPV6_JOIN_ANYCAST:
+ case IPV6_LEAVE_ANYCAST:
+ {
+ struct ipv6_mreq mreq;
+
+ if (optlen != sizeof(struct ipv6_mreq))
+ goto e_inval;
+
+ retv = -EFAULT;
+ if (copy_from_user(&mreq, optval, sizeof(struct ipv6_mreq)))
+ break;
+
+ if (optname == IPV6_JOIN_ANYCAST)
+ retv = ipv6_sock_ac_join(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+ else
+ retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
+ break;
+ }
+ case MCAST_JOIN_GROUP:
+ case MCAST_LEAVE_GROUP:
+ {
+ struct group_req greq;
+ struct sockaddr_in6 *psin6;
+
+ retv = -EFAULT;
+ if (copy_from_user(&greq, optval, sizeof(struct group_req)))
+ break;
+ if (greq.gr_group.ss_family != AF_INET6) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+ psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+ if (optname == MCAST_JOIN_GROUP)
+ retv = ipv6_sock_mc_join(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ else
+ retv = ipv6_sock_mc_drop(sk, greq.gr_interface,
+ &psin6->sin6_addr);
+ break;
+ }
+ case MCAST_JOIN_SOURCE_GROUP:
+ case MCAST_LEAVE_SOURCE_GROUP:
+ case MCAST_BLOCK_SOURCE:
+ case MCAST_UNBLOCK_SOURCE:
+ {
+ struct group_source_req greqs;
+ int omode, add;
+
+ if (optlen != sizeof(struct group_source_req))
+ goto e_inval;
+ if (copy_from_user(&greqs, optval, sizeof(greqs))) {
+ retv = -EFAULT;
+ break;
+ }
+ if (greqs.gsr_group.ss_family != AF_INET6 ||
+ greqs.gsr_source.ss_family != AF_INET6) {
+ retv = -EADDRNOTAVAIL;
+ break;
+ }
+ if (optname == MCAST_BLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 1;
+ } else if (optname == MCAST_UNBLOCK_SOURCE) {
+ omode = MCAST_EXCLUDE;
+ add = 0;
+ } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+ struct sockaddr_in6 *psin6;
+
+ psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
+ retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
+ &psin6->sin6_addr);
+ if (retv)
+ break;
+ omode = MCAST_INCLUDE;
+ add = 1;
+ } else /*IP_DROP_SOURCE_MEMBERSHIP */ {
+ omode = MCAST_INCLUDE;
+ add = 0;
+ }
+ retv = ip6_mc_source(add, omode, sk, &greqs);
+ break;
+ }
+ case MCAST_MSFILTER:
+ {
+ extern int sysctl_optmem_max;
+ extern int sysctl_mld_max_msf;
+ struct group_filter *gsf;
+
+ if (optlen < GROUP_FILTER_SIZE(0))
+ goto e_inval;
+ if (optlen > sysctl_optmem_max) {
+ retv = -ENOBUFS;
+ break;
+ }
+ gsf = (struct group_filter *)kmalloc(optlen,GFP_KERNEL);
+ if (gsf == 0) {
+ retv = -ENOBUFS;
+ break;
+ }
+ retv = -EFAULT;
+ if (copy_from_user(gsf, optval, optlen)) {
+ kfree(gsf);
+ break;
+ }
+ /* numsrc >= (4G-140)/128 overflow in 32 bits */
+ if (gsf->gf_numsrc >= 0x1ffffffU ||
+ gsf->gf_numsrc > sysctl_mld_max_msf) {
+ kfree(gsf);
+ retv = -ENOBUFS;
+ break;
+ }
+ if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
+ kfree(gsf);
+ retv = -EINVAL;
+ break;
+ }
+ retv = ip6_mc_msfilter(sk, gsf);
+ kfree(gsf);
+
+ break;
+ }
+ case IPV6_ROUTER_ALERT:
+ retv = ip6_ra_control(sk, val, NULL);
+ break;
+ case IPV6_MTU_DISCOVER:
+ if (val<0 || val>2)
+ goto e_inval;
+ np->pmtudisc = val;
+ retv = 0;
+ break;
+ case IPV6_MTU:
+ if (val && val < IPV6_MIN_MTU)
+ goto e_inval;
+ np->frag_size = val;
+ retv = 0;
+ break;
+ case IPV6_RECVERR:
+ np->recverr = valbool;
+ if (!val)
+ skb_queue_purge(&sk->sk_error_queue);
+ retv = 0;
+ break;
+ case IPV6_FLOWINFO_SEND:
+ np->sndflow = valbool;
+ retv = 0;
+ break;
+ case IPV6_FLOWLABEL_MGR:
+ retv = ipv6_flowlabel_opt(sk, optval, optlen);
+ break;
+ case IPV6_IPSEC_POLICY:
+ case IPV6_XFRM_POLICY:
+ retv = xfrm_user_policy(sk, optname, optval, optlen);
+ break;
+
+#ifdef CONFIG_NETFILTER
+ default:
+ retv = nf_setsockopt(sk, PF_INET6, optname, optval,
+ optlen);
+ break;
+#endif
+
+ }
+ release_sock(sk);
+
+out:
+ return retv;
+
+e_inval:
+ release_sock(sk);
+ return -EINVAL;
+}
+
+int ipv6_getsockopt(struct sock *sk, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int len;
+ int val;
+
+ if (level == SOL_IP && sk->sk_type != SOCK_RAW)
+ return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+ if(level!=SOL_IPV6)
+ return -ENOPROTOOPT;
+ if (get_user(len, optlen))
+ return -EFAULT;
+ switch (optname) {
+ case IPV6_ADDRFORM:
+ if (sk->sk_protocol != IPPROTO_UDP &&
+ sk->sk_protocol != IPPROTO_TCP)
+ return -EINVAL;
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return -ENOTCONN;
+ val = sk->sk_family;
+ break;
+ case MCAST_MSFILTER:
+ {
+ struct group_filter gsf;
+ int err;
+
+ if (len < GROUP_FILTER_SIZE(0))
+ return -EINVAL;
+ if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
+ return -EFAULT;
+ lock_sock(sk);
+ err = ip6_mc_msfget(sk, &gsf,
+ (struct group_filter __user *)optval, optlen);
+ release_sock(sk);
+ return err;
+ }
+
+ case IPV6_PKTOPTIONS:
+ {
+ struct msghdr msg;
+ struct sk_buff *skb;
+
+ if (sk->sk_type != SOCK_STREAM)
+ return -ENOPROTOOPT;
+
+ msg.msg_control = optval;
+ msg.msg_controllen = len;
+ msg.msg_flags = 0;
+
+ lock_sock(sk);
+ skb = np->pktoptions;
+ if (skb)
+ atomic_inc(&skb->users);
+ release_sock(sk);
+
+ if (skb) {
+ int err = datagram_recv_ctl(sk, &msg, skb);
+ kfree_skb(skb);
+ if (err)
+ return err;
+ } else {
+ if (np->rxopt.bits.rxinfo) {
+ struct in6_pktinfo src_info;
+ src_info.ipi6_ifindex = np->mcast_oif;
+ ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr);
+ put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+ }
+ if (np->rxopt.bits.rxhlim) {
+ int hlim = np->mcast_hops;
+ put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);
+ }
+ }
+ len -= msg.msg_controllen;
+ return put_user(len, optlen);
+ }
+ case IPV6_MTU:
+ {
+ struct dst_entry *dst;
+ val = 0;
+ lock_sock(sk);
+ dst = sk_dst_get(sk);
+ if (dst) {
+ val = dst_mtu(dst);
+ dst_release(dst);
+ }
+ release_sock(sk);
+ if (!val)
+ return -ENOTCONN;
+ break;
+ }
+
+ case IPV6_V6ONLY:
+ val = np->ipv6only;
+ break;
+
+ case IPV6_PKTINFO:
+ val = np->rxopt.bits.rxinfo;
+ break;
+
+ case IPV6_HOPLIMIT:
+ val = np->rxopt.bits.rxhlim;
+ break;
+
+ case IPV6_RTHDR:
+ val = np->rxopt.bits.srcrt;
+ break;
+
+ case IPV6_HOPOPTS:
+ val = np->rxopt.bits.hopopts;
+ break;
+
+ case IPV6_DSTOPTS:
+ val = np->rxopt.bits.dstopts;
+ break;
+
+ case IPV6_FLOWINFO:
+ val = np->rxopt.bits.rxflow;
+ break;
+
+ case IPV6_UNICAST_HOPS:
+ val = np->hop_limit;
+ break;
+
+ case IPV6_MULTICAST_HOPS:
+ val = np->mcast_hops;
+ break;
+
+ case IPV6_MULTICAST_LOOP:
+ val = np->mc_loop;
+ break;
+
+ case IPV6_MULTICAST_IF:
+ val = np->mcast_oif;
+ break;
+
+ case IPV6_MTU_DISCOVER:
+ val = np->pmtudisc;
+ break;
+
+ case IPV6_RECVERR:
+ val = np->recverr;
+ break;
+
+ case IPV6_FLOWINFO_SEND:
+ val = np->sndflow;
+ break;
+
+ default:
+#ifdef CONFIG_NETFILTER
+ lock_sock(sk);
+ val = nf_getsockopt(sk, PF_INET6, optname, optval,
+ &len);
+ release_sock(sk);
+ if (val >= 0)
+ val = put_user(len, optlen);
+ return val;
+#else
+ return -EINVAL;
+#endif
+ }
+ len = min_t(unsigned int, sizeof(int), len);
+ if(put_user(len, optlen))
+ return -EFAULT;
+ if(copy_to_user(optval,&val,len))
+ return -EFAULT;
+ return 0;
+}
+
+void __init ipv6_packet_init(void)
+{
+ dev_add_pack(&ipv6_packet_type);
+}
+
+void ipv6_packet_cleanup(void)
+{
+ dev_remove_pack(&ipv6_packet_type);
+}
diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
new file mode 100644
index 000000000000..2f4c91ddc9a3
--- /dev/null
+++ b/net/ipv6/ipv6_syms.c
@@ -0,0 +1,41 @@
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <net/protocol.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+
+EXPORT_SYMBOL(ipv6_addr_type);
+EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(icmpv6_statistics);
+EXPORT_SYMBOL(icmpv6_err_convert);
+EXPORT_SYMBOL(ndisc_mc_map);
+EXPORT_SYMBOL(register_inet6addr_notifier);
+EXPORT_SYMBOL(unregister_inet6addr_notifier);
+EXPORT_SYMBOL(ip6_route_output);
+#ifdef CONFIG_NETFILTER
+EXPORT_SYMBOL(ip6_route_me_harder);
+#endif
+EXPORT_SYMBOL(addrconf_lock);
+EXPORT_SYMBOL(ipv6_setsockopt);
+EXPORT_SYMBOL(ipv6_getsockopt);
+EXPORT_SYMBOL(inet6_register_protosw);
+EXPORT_SYMBOL(inet6_unregister_protosw);
+EXPORT_SYMBOL(inet6_add_protocol);
+EXPORT_SYMBOL(inet6_del_protocol);
+EXPORT_SYMBOL(ip6_xmit);
+EXPORT_SYMBOL(inet6_release);
+EXPORT_SYMBOL(inet6_bind);
+EXPORT_SYMBOL(inet6_getname);
+EXPORT_SYMBOL(inet6_ioctl);
+EXPORT_SYMBOL(ipv6_get_saddr);
+EXPORT_SYMBOL(ipv6_chk_addr);
+EXPORT_SYMBOL(in6_dev_finish_destroy);
+#ifdef CONFIG_XFRM
+EXPORT_SYMBOL(xfrm6_rcv);
+#endif
+EXPORT_SYMBOL(rt6_lookup);
+EXPORT_SYMBOL(fl6_sock_lookup);
+EXPORT_SYMBOL(ipv6_push_nfrag_opts);
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
new file mode 100644
index 000000000000..393b6e6f50a9
--- /dev/null
+++ b/net/ipv6/mcast.c
@@ -0,0 +1,2499 @@
+/*
+ * Multicast support for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ *
+ * $Id: mcast.c,v 1.40 2002/02/08 03:57:19 davem Exp $
+ *
+ * Based on linux/ipv4/igmp.c and linux/ipv4/ip_sockglue.c
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/* Changes:
+ *
+ * yoshfuji : fix format of router-alert option
+ * YOSHIFUJI Hideaki @USAGI:
+ * Fixed source address for MLD message based on
+ * <draft-ietf-magma-mld-source-05.txt>.
+ * YOSHIFUJI Hideaki @USAGI:
+ * - Ignore Queries for invalid addresses.
+ * - MLD for link-local addresses.
+ * David L Stevens <dlstevens@us.ibm.com>:
+ * - MLDv2 support
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/jiffies.h>
+#include <linux/times.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/if_inet6.h>
+#include <net/ndisc.h>
+#include <net/addrconf.h>
+#include <net/ip6_route.h>
+
+#include <net/ip6_checksum.h>
+
+/* Set to 3 to get tracing... */
+#define MCAST_DEBUG 2
+
+#if MCAST_DEBUG >= 3
+#define MDBG(x) printk x
+#else
+#define MDBG(x)
+#endif
+
+/*
+ * These header formats should be in a separate include file, but icmpv6.h
+ * doesn't have in6_addr defined in all cases, there is no __u128, and no
+ * other files reference these.
+ *
+ * +-DLS 4/14/03
+ */
+
+/* Multicast Listener Discovery version 2 headers */
+
+struct mld2_grec {
+ __u8 grec_type;
+ __u8 grec_auxwords;
+ __u16 grec_nsrcs;
+ struct in6_addr grec_mca;
+ struct in6_addr grec_src[0];
+};
+
+struct mld2_report {
+ __u8 type;
+ __u8 resv1;
+ __u16 csum;
+ __u16 resv2;
+ __u16 ngrec;
+ struct mld2_grec grec[0];
+};
+
+struct mld2_query {
+ __u8 type;
+ __u8 code;
+ __u16 csum;
+ __u16 mrc;
+ __u16 resv1;
+ struct in6_addr mca;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 qrv:3,
+ suppress:1,
+ resv2:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 resv2:4,
+ suppress:1,
+ qrv:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 qqic;
+ __u16 nsrcs;
+ struct in6_addr srcs[0];
+};
+
+static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
+
+/* Big mc list lock for all the sockets */
+static DEFINE_RWLOCK(ipv6_sk_mc_lock);
+
+static struct socket *igmp6_socket;
+
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr);
+
+static void igmp6_join_group(struct ifmcaddr6 *ma);
+static void igmp6_leave_group(struct ifmcaddr6 *ma);
+static void igmp6_timer_handler(unsigned long data);
+
+static void mld_gq_timer_expire(unsigned long data);
+static void mld_ifc_timer_expire(unsigned long data);
+static void mld_ifc_event(struct inet6_dev *idev);
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *pmc);
+static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *addr);
+static void mld_clear_delrec(struct inet6_dev *idev);
+static int sf_setstate(struct ifmcaddr6 *pmc);
+static void sf_markstate(struct ifmcaddr6 *pmc);
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc);
+static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
+ int sfmode, int sfcount, struct in6_addr *psfsrc,
+ int delta);
+static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
+ int sfmode, int sfcount, struct in6_addr *psfsrc,
+ int delta);
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+ struct inet6_dev *idev);
+
+
+#define IGMP6_UNSOLICITED_IVAL (10*HZ)
+#define MLD_QRV_DEFAULT 2
+
+#define MLD_V1_SEEN(idev) (ipv6_devconf.force_mld_version == 1 || \
+ (idev)->cnf.force_mld_version == 1 || \
+ ((idev)->mc_v1_seen && \
+ time_before(jiffies, (idev)->mc_v1_seen)))
+
+#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
+#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
+ ((value) < (thresh) ? (value) : \
+ ((MLDV2_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \
+ (MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
+
+#define MLDV2_QQIC(value) MLDV2_EXP(0x80, 4, 3, value)
+#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
+
+#define IPV6_MLD_MAX_MSF 10
+
+int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF;
+
+/*
+ * socket join on multicast group
+ */
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+ struct net_device *dev = NULL;
+ struct ipv6_mc_socklist *mc_lst;
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ int err;
+
+ if (!ipv6_addr_is_multicast(addr))
+ return -EINVAL;
+
+ mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
+
+ if (mc_lst == NULL)
+ return -ENOMEM;
+
+ mc_lst->next = NULL;
+ ipv6_addr_copy(&mc_lst->addr, addr);
+
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+ rt = rt6_lookup(addr, NULL, 0, 0);
+ if (rt) {
+ dev = rt->rt6i_dev;
+ dev_hold(dev);
+ dst_release(&rt->u.dst);
+ }
+ } else
+ dev = dev_get_by_index(ifindex);
+
+ if (dev == NULL) {
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ return -ENODEV;
+ }
+
+ mc_lst->ifindex = dev->ifindex;
+ mc_lst->sfmode = MCAST_EXCLUDE;
+ mc_lst->sflist = NULL;
+
+ /*
+ * now add/increase the group membership on the device
+ */
+
+ err = ipv6_dev_mc_inc(dev, addr);
+
+ if (err) {
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ dev_put(dev);
+ return err;
+ }
+
+ write_lock_bh(&ipv6_sk_mc_lock);
+ mc_lst->next = np->ipv6_mc_list;
+ np->ipv6_mc_list = mc_lst;
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+ dev_put(dev);
+
+ return 0;
+}
+
+/*
+ * socket leave on multicast group
+ */
+int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc_lst, **lnk;
+
+ write_lock_bh(&ipv6_sk_mc_lock);
+ for (lnk = &np->ipv6_mc_list; (mc_lst = *lnk) !=NULL ; lnk = &mc_lst->next) {
+ if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
+ ipv6_addr_equal(&mc_lst->addr, addr)) {
+ struct net_device *dev;
+
+ *lnk = mc_lst->next;
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+ if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
+ struct inet6_dev *idev = in6_dev_get(dev);
+
+ if (idev) {
+ (void) ip6_mc_leave_src(sk,mc_lst,idev);
+ __ipv6_dev_mc_dec(idev, &mc_lst->addr);
+ in6_dev_put(idev);
+ }
+ dev_put(dev);
+ }
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+ return 0;
+ }
+ }
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+ return -ENOENT;
+}
+
+static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
+{
+ struct net_device *dev = NULL;
+ struct inet6_dev *idev = NULL;
+
+ if (ifindex == 0) {
+ struct rt6_info *rt;
+
+ rt = rt6_lookup(group, NULL, 0, 0);
+ if (rt) {
+ dev = rt->rt6i_dev;
+ dev_hold(dev);
+ dst_release(&rt->u.dst);
+ }
+ } else
+ dev = dev_get_by_index(ifindex);
+
+ if (!dev)
+ return NULL;
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ dev_put(dev);
+ return NULL;
+ }
+ read_lock_bh(&idev->lock);
+ if (idev->dead) {
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ dev_put(dev);
+ return NULL;
+ }
+ return idev;
+}
+
+void ipv6_sock_mc_close(struct sock *sk)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc_lst;
+
+ write_lock_bh(&ipv6_sk_mc_lock);
+ while ((mc_lst = np->ipv6_mc_list) != NULL) {
+ struct net_device *dev;
+
+ np->ipv6_mc_list = mc_lst->next;
+ write_unlock_bh(&ipv6_sk_mc_lock);
+
+ dev = dev_get_by_index(mc_lst->ifindex);
+ if (dev) {
+ struct inet6_dev *idev = in6_dev_get(dev);
+
+ if (idev) {
+ (void) ip6_mc_leave_src(sk, mc_lst, idev);
+ __ipv6_dev_mc_dec(idev, &mc_lst->addr);
+ in6_dev_put(idev);
+ }
+ dev_put(dev);
+ }
+
+ sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+
+ write_lock_bh(&ipv6_sk_mc_lock);
+ }
+ write_unlock_bh(&ipv6_sk_mc_lock);
+}
+
+int ip6_mc_source(int add, int omode, struct sock *sk,
+ struct group_source_req *pgsr)
+{
+ struct in6_addr *source, *group;
+ struct ipv6_mc_socklist *pmc;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *psl;
+ int i, j, rv;
+ int err;
+
+ if (pgsr->gsr_group.ss_family != AF_INET6 ||
+ pgsr->gsr_source.ss_family != AF_INET6)
+ return -EINVAL;
+
+ source = &((struct sockaddr_in6 *)&pgsr->gsr_source)->sin6_addr;
+ group = &((struct sockaddr_in6 *)&pgsr->gsr_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+
+ idev = ip6_mc_find_dev(group, pgsr->gsr_interface);
+ if (!idev)
+ return -ENODEV;
+ dev = idev->dev;
+
+ err = -EADDRNOTAVAIL;
+
+ for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
+ continue;
+ if (ipv6_addr_equal(&pmc->addr, group))
+ break;
+ }
+ if (!pmc) /* must have a prior join */
+ goto done;
+ /* if a source filter was set, must be the same mode as before */
+ if (pmc->sflist) {
+ if (pmc->sfmode != omode)
+ goto done;
+ } else if (pmc->sfmode != omode) {
+ /* allow mode switches for empty-set filters */
+ ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
+ ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+ pmc->sfmode = omode;
+ }
+
+ psl = pmc->sflist;
+ if (!add) {
+ if (!psl)
+ goto done;
+ rv = !0;
+ for (i=0; i<psl->sl_count; i++) {
+ rv = memcmp(&psl->sl_addr[i], source,
+ sizeof(struct in6_addr));
+ if (rv == 0)
+ break;
+ }
+ if (rv) /* source not found */
+ goto done;
+
+ /* update the interface filter */
+ ip6_mc_del_src(idev, group, omode, 1, source, 1);
+
+ for (j=i+1; j<psl->sl_count; j++)
+ psl->sl_addr[j-1] = psl->sl_addr[j];
+ psl->sl_count--;
+ err = 0;
+ goto done;
+ }
+ /* else, add a new source to the filter */
+
+ if (psl && psl->sl_count >= sysctl_mld_max_msf) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ if (!psl || psl->sl_count == psl->sl_max) {
+ struct ip6_sf_socklist *newpsl;
+ int count = IP6_SFBLOCK;
+
+ if (psl)
+ count += psl->sl_max;
+ newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
+ IP6_SFLSIZE(count), GFP_ATOMIC);
+ if (!newpsl) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ newpsl->sl_max = count;
+ newpsl->sl_count = count - IP6_SFBLOCK;
+ if (psl) {
+ for (i=0; i<psl->sl_count; i++)
+ newpsl->sl_addr[i] = psl->sl_addr[i];
+ sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+ }
+ pmc->sflist = psl = newpsl;
+ }
+ rv = 1; /* > 0 for insert logic below if sl_count is 0 */
+ for (i=0; i<psl->sl_count; i++) {
+ rv = memcmp(&psl->sl_addr[i], source, sizeof(struct in6_addr));
+ if (rv == 0)
+ break;
+ }
+ if (rv == 0) /* address already there is an error */
+ goto done;
+ for (j=psl->sl_count-1; j>=i; j--)
+ psl->sl_addr[j+1] = psl->sl_addr[j];
+ psl->sl_addr[i] = *source;
+ psl->sl_count++;
+ err = 0;
+ /* update the interface list */
+ ip6_mc_add_src(idev, group, omode, 1, source, 1);
+done:
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ dev_put(dev);
+ return err;
+}
+
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+{
+ struct in6_addr *group;
+ struct ipv6_mc_socklist *pmc;
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *newpsl, *psl;
+ int i, err;
+
+ group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+ if (gsf->gf_fmode != MCAST_INCLUDE &&
+ gsf->gf_fmode != MCAST_EXCLUDE)
+ return -EINVAL;
+
+ idev = ip6_mc_find_dev(group, gsf->gf_interface);
+
+ if (!idev)
+ return -ENODEV;
+ dev = idev->dev;
+ err = -EADDRNOTAVAIL;
+
+ for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ if (pmc->ifindex != gsf->gf_interface)
+ continue;
+ if (ipv6_addr_equal(&pmc->addr, group))
+ break;
+ }
+ if (!pmc) /* must have a prior join */
+ goto done;
+ if (gsf->gf_numsrc) {
+ newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
+ IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
+ if (!newpsl) {
+ err = -ENOBUFS;
+ goto done;
+ }
+ newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
+ for (i=0; i<newpsl->sl_count; ++i) {
+ struct sockaddr_in6 *psin6;
+
+ psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
+ newpsl->sl_addr[i] = psin6->sin6_addr;
+ }
+ err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
+ newpsl->sl_count, newpsl->sl_addr, 0);
+ if (err) {
+ sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ goto done;
+ }
+ } else
+ newpsl = NULL;
+ psl = pmc->sflist;
+ if (psl) {
+ (void) ip6_mc_del_src(idev, group, pmc->sfmode,
+ psl->sl_count, psl->sl_addr, 0);
+ sock_kfree_s(sk, psl, IP6_SFLSIZE(psl->sl_max));
+ } else
+ (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
+ pmc->sflist = newpsl;
+ pmc->sfmode = gsf->gf_fmode;
+done:
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ dev_put(dev);
+ return err;
+}
+
+int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
+ struct group_filter __user *optval, int __user *optlen)
+{
+ int err, i, count, copycount;
+ struct in6_addr *group;
+ struct ipv6_mc_socklist *pmc;
+ struct inet6_dev *idev;
+ struct net_device *dev;
+ struct ipv6_pinfo *inet6 = inet6_sk(sk);
+ struct ip6_sf_socklist *psl;
+
+ group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
+
+ if (!ipv6_addr_is_multicast(group))
+ return -EINVAL;
+
+ idev = ip6_mc_find_dev(group, gsf->gf_interface);
+
+ if (!idev)
+ return -ENODEV;
+
+ dev = idev->dev;
+
+ err = -EADDRNOTAVAIL;
+
+ for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
+ if (pmc->ifindex != gsf->gf_interface)
+ continue;
+ if (ipv6_addr_equal(group, &pmc->addr))
+ break;
+ }
+ if (!pmc) /* must have a prior join */
+ goto done;
+ gsf->gf_fmode = pmc->sfmode;
+ psl = pmc->sflist;
+ count = psl ? psl->sl_count : 0;
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ dev_put(dev);
+
+ copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
+ gsf->gf_numsrc = count;
+ if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
+ copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
+ return -EFAULT;
+ }
+ for (i=0; i<copycount; i++) {
+ struct sockaddr_in6 *psin6;
+ struct sockaddr_storage ss;
+
+ psin6 = (struct sockaddr_in6 *)&ss;
+ memset(&ss, 0, sizeof(ss));
+ psin6->sin6_family = AF_INET6;
+ psin6->sin6_addr = psl->sl_addr[i];
+ if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+ return -EFAULT;
+ }
+ return 0;
+done:
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ dev_put(dev);
+ return err;
+}
+
+int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr,
+ struct in6_addr *src_addr)
+{
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_mc_socklist *mc;
+ struct ip6_sf_socklist *psl;
+ int rv = 1;
+
+ read_lock(&ipv6_sk_mc_lock);
+ for (mc = np->ipv6_mc_list; mc; mc = mc->next) {
+ if (ipv6_addr_equal(&mc->addr, mc_addr))
+ break;
+ }
+ if (!mc) {
+ read_unlock(&ipv6_sk_mc_lock);
+ return 1;
+ }
+ psl = mc->sflist;
+ if (!psl) {
+ rv = mc->sfmode == MCAST_EXCLUDE;
+ } else {
+ int i;
+
+ for (i=0; i<psl->sl_count; i++) {
+ if (ipv6_addr_equal(&psl->sl_addr[i], src_addr))
+ break;
+ }
+ if (mc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
+ rv = 0;
+ if (mc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
+ rv = 0;
+ }
+ read_unlock(&ipv6_sk_mc_lock);
+
+ return rv;
+}
+
+static void ma_put(struct ifmcaddr6 *mc)
+{
+ if (atomic_dec_and_test(&mc->mca_refcnt)) {
+ in6_dev_put(mc->idev);
+ kfree(mc);
+ }
+}
+
+static void igmp6_group_added(struct ifmcaddr6 *mc)
+{
+ struct net_device *dev = mc->idev->dev;
+ char buf[MAX_ADDR_LEN];
+
+ spin_lock_bh(&mc->mca_lock);
+ if (!(mc->mca_flags&MAF_LOADED)) {
+ mc->mca_flags |= MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+ dev_mc_add(dev, buf, dev->addr_len, 0);
+ }
+ spin_unlock_bh(&mc->mca_lock);
+
+ if (!(dev->flags & IFF_UP) || (mc->mca_flags & MAF_NOREPORT))
+ return;
+
+ if (MLD_V1_SEEN(mc->idev)) {
+ igmp6_join_group(mc);
+ return;
+ }
+ /* else v2 */
+
+ mc->mca_crcount = mc->idev->mc_qrv;
+ mld_ifc_event(mc->idev);
+}
+
+static void igmp6_group_dropped(struct ifmcaddr6 *mc)
+{
+ struct net_device *dev = mc->idev->dev;
+ char buf[MAX_ADDR_LEN];
+
+ spin_lock_bh(&mc->mca_lock);
+ if (mc->mca_flags&MAF_LOADED) {
+ mc->mca_flags &= ~MAF_LOADED;
+ if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
+ dev_mc_delete(dev, buf, dev->addr_len, 0);
+ }
+
+ if (mc->mca_flags & MAF_NOREPORT)
+ goto done;
+ spin_unlock_bh(&mc->mca_lock);
+
+ if (!mc->idev->dead)
+ igmp6_leave_group(mc);
+
+ spin_lock_bh(&mc->mca_lock);
+ if (del_timer(&mc->mca_timer))
+ atomic_dec(&mc->mca_refcnt);
+done:
+ ip6_mc_clear_src(mc);
+ spin_unlock_bh(&mc->mca_lock);
+}
+
+/*
+ * deleted ifmcaddr6 manipulation
+ */
+static void mld_add_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
+{
+ struct ifmcaddr6 *pmc;
+
+ /* this is an "ifmcaddr6" for convenience; only the fields below
+ * are actually used. In particular, the refcnt and users are not
+ * used for management of the delete list. Using the same structure
+ * for deleted items allows change reports to use common code with
+ * non-deleted or query-response MCA's.
+ */
+ pmc = (struct ifmcaddr6 *)kmalloc(sizeof(*pmc), GFP_ATOMIC);
+ if (!pmc)
+ return;
+ memset(pmc, 0, sizeof(*pmc));
+ spin_lock_bh(&im->mca_lock);
+ spin_lock_init(&pmc->mca_lock);
+ pmc->idev = im->idev;
+ in6_dev_hold(idev);
+ pmc->mca_addr = im->mca_addr;
+ pmc->mca_crcount = idev->mc_qrv;
+ pmc->mca_sfmode = im->mca_sfmode;
+ if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ struct ip6_sf_list *psf;
+
+ pmc->mca_tomb = im->mca_tomb;
+ pmc->mca_sources = im->mca_sources;
+ im->mca_tomb = im->mca_sources = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ psf->sf_crcount = pmc->mca_crcount;
+ }
+ spin_unlock_bh(&im->mca_lock);
+
+ write_lock_bh(&idev->mc_lock);
+ pmc->next = idev->mc_tomb;
+ idev->mc_tomb = pmc;
+ write_unlock_bh(&idev->mc_lock);
+}
+
+static void mld_del_delrec(struct inet6_dev *idev, struct in6_addr *pmca)
+{
+ struct ifmcaddr6 *pmc, *pmc_prev;
+ struct ip6_sf_list *psf, *psf_next;
+
+ write_lock_bh(&idev->mc_lock);
+ pmc_prev = NULL;
+ for (pmc=idev->mc_tomb; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(&pmc->mca_addr, pmca))
+ break;
+ pmc_prev = pmc;
+ }
+ if (pmc) {
+ if (pmc_prev)
+ pmc_prev->next = pmc->next;
+ else
+ idev->mc_tomb = pmc->next;
+ }
+ write_unlock_bh(&idev->mc_lock);
+ if (pmc) {
+ for (psf=pmc->mca_tomb; psf; psf=psf_next) {
+ psf_next = psf->sf_next;
+ kfree(psf);
+ }
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ }
+}
+
+static void mld_clear_delrec(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc, *nextpmc;
+
+ write_lock_bh(&idev->mc_lock);
+ pmc = idev->mc_tomb;
+ idev->mc_tomb = NULL;
+ write_unlock_bh(&idev->mc_lock);
+
+ for (; pmc; pmc = nextpmc) {
+ nextpmc = pmc->next;
+ ip6_mc_clear_src(pmc);
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ }
+
+ /* clear dead sources, too */
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ struct ip6_sf_list *psf, *psf_next;
+
+ spin_lock_bh(&pmc->mca_lock);
+ psf = pmc->mca_tomb;
+ pmc->mca_tomb = NULL;
+ spin_unlock_bh(&pmc->mca_lock);
+ for (; psf; psf=psf_next) {
+ psf_next = psf->sf_next;
+ kfree(psf);
+ }
+ }
+ read_unlock_bh(&idev->lock);
+}
+
+
+/*
+ * device multicast group inc (add if not found)
+ */
+int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr)
+{
+ struct ifmcaddr6 *mc;
+ struct inet6_dev *idev;
+
+ idev = in6_dev_get(dev);
+
+ if (idev == NULL)
+ return -EINVAL;
+
+ write_lock_bh(&idev->lock);
+ if (idev->dead) {
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return -ENODEV;
+ }
+
+ for (mc = idev->mc_list; mc; mc = mc->next) {
+ if (ipv6_addr_equal(&mc->mca_addr, addr)) {
+ mc->mca_users++;
+ write_unlock_bh(&idev->lock);
+ ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
+ NULL, 0);
+ in6_dev_put(idev);
+ return 0;
+ }
+ }
+
+ /*
+ * not found: create a new one.
+ */
+
+ mc = kmalloc(sizeof(struct ifmcaddr6), GFP_ATOMIC);
+
+ if (mc == NULL) {
+ write_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return -ENOMEM;
+ }
+
+ memset(mc, 0, sizeof(struct ifmcaddr6));
+ init_timer(&mc->mca_timer);
+ mc->mca_timer.function = igmp6_timer_handler;
+ mc->mca_timer.data = (unsigned long) mc;
+
+ ipv6_addr_copy(&mc->mca_addr, addr);
+ mc->idev = idev;
+ mc->mca_users = 1;
+ /* mca_stamp should be updated upon changes */
+ mc->mca_cstamp = mc->mca_tstamp = jiffies;
+ atomic_set(&mc->mca_refcnt, 2);
+ spin_lock_init(&mc->mca_lock);
+
+ /* initial mode is (EX, empty) */
+ mc->mca_sfmode = MCAST_EXCLUDE;
+ mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+
+ if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ mc->mca_flags |= MAF_NOREPORT;
+
+ mc->next = idev->mc_list;
+ idev->mc_list = mc;
+ write_unlock_bh(&idev->lock);
+
+ mld_del_delrec(idev, &mc->mca_addr);
+ igmp6_group_added(mc);
+ ma_put(mc);
+ return 0;
+}
+
+/*
+ * device multicast group del
+ */
+int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr)
+{
+ struct ifmcaddr6 *ma, **map;
+
+ write_lock_bh(&idev->lock);
+ for (map = &idev->mc_list; (ma=*map) != NULL; map = &ma->next) {
+ if (ipv6_addr_equal(&ma->mca_addr, addr)) {
+ if (--ma->mca_users == 0) {
+ *map = ma->next;
+ write_unlock_bh(&idev->lock);
+
+ igmp6_group_dropped(ma);
+
+ ma_put(ma);
+ return 0;
+ }
+ write_unlock_bh(&idev->lock);
+ return 0;
+ }
+ }
+ write_unlock_bh(&idev->lock);
+
+ return -ENOENT;
+}
+
+int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr)
+{
+ struct inet6_dev *idev = in6_dev_get(dev);
+ int err;
+
+ if (!idev)
+ return -ENODEV;
+
+ err = __ipv6_dev_mc_dec(idev, addr);
+
+ in6_dev_put(idev);
+
+ return err;
+}
+
+/*
+ * identify MLD packets for MLD filter exceptions
+ */
+int ipv6_is_mld(struct sk_buff *skb, int nexthdr)
+{
+ struct icmp6hdr *pic;
+
+ if (nexthdr != IPPROTO_ICMPV6)
+ return 0;
+
+ if (!pskb_may_pull(skb, sizeof(struct icmp6hdr)))
+ return 0;
+
+ pic = (struct icmp6hdr *)skb->h.raw;
+
+ switch (pic->icmp6_type) {
+ case ICMPV6_MGM_QUERY:
+ case ICMPV6_MGM_REPORT:
+ case ICMPV6_MGM_REDUCTION:
+ case ICMPV6_MLD2_REPORT:
+ return 1;
+ default:
+ break;
+ }
+ return 0;
+}
+
+/*
+ * check if the interface/address pair is valid
+ */
+int ipv6_chk_mcast_addr(struct net_device *dev, struct in6_addr *group,
+ struct in6_addr *src_addr)
+{
+ struct inet6_dev *idev;
+ struct ifmcaddr6 *mc;
+ int rv = 0;
+
+ idev = in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ for (mc = idev->mc_list; mc; mc=mc->next) {
+ if (ipv6_addr_equal(&mc->mca_addr, group))
+ break;
+ }
+ if (mc) {
+ if (src_addr && !ipv6_addr_any(src_addr)) {
+ struct ip6_sf_list *psf;
+
+ spin_lock_bh(&mc->mca_lock);
+ for (psf=mc->mca_sources;psf;psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, src_addr))
+ break;
+ }
+ if (psf)
+ rv = psf->sf_count[MCAST_INCLUDE] ||
+ psf->sf_count[MCAST_EXCLUDE] !=
+ mc->mca_sfcount[MCAST_EXCLUDE];
+ else
+ rv = mc->mca_sfcount[MCAST_EXCLUDE] !=0;
+ spin_unlock_bh(&mc->mca_lock);
+ } else
+ rv = 1; /* don't filter unspecified source */
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ }
+ return rv;
+}
+
+static void mld_gq_start_timer(struct inet6_dev *idev)
+{
+ int tv = net_random() % idev->mc_maxdelay;
+
+ idev->mc_gq_running = 1;
+ if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+static void mld_ifc_start_timer(struct inet6_dev *idev, int delay)
+{
+ int tv = net_random() % delay;
+
+ if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))
+ in6_dev_hold(idev);
+}
+
+/*
+ * IGMP handling (alias multicast ICMPv6 messages)
+ */
+
+static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)
+{
+ unsigned long delay = resptime;
+
+ /* Do not start timer for these addresses */
+ if (ipv6_addr_is_ll_all_nodes(&ma->mca_addr) ||
+ IPV6_ADDR_MC_SCOPE(&ma->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
+ return;
+
+ if (del_timer(&ma->mca_timer)) {
+ atomic_dec(&ma->mca_refcnt);
+ delay = ma->mca_timer.expires - jiffies;
+ }
+
+ if (delay >= resptime) {
+ if (resptime)
+ delay = net_random() % resptime;
+ else
+ delay = 1;
+ }
+ ma->mca_timer.expires = jiffies + delay;
+ if (!mod_timer(&ma->mca_timer, jiffies + delay))
+ atomic_inc(&ma->mca_refcnt);
+ ma->mca_flags |= MAF_TIMER_RUNNING;
+}
+
+static void mld_marksources(struct ifmcaddr6 *pmc, int nsrcs,
+ struct in6_addr *srcs)
+{
+ struct ip6_sf_list *psf;
+ int i, scount;
+
+ scount = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (scount == nsrcs)
+ break;
+ for (i=0; i<nsrcs; i++)
+ if (ipv6_addr_equal(&srcs[i], &psf->sf_addr)) {
+ psf->sf_gsresp = 1;
+ scount++;
+ break;
+ }
+ }
+}
+
+int igmp6_event_query(struct sk_buff *skb)
+{
+ struct mld2_query *mlh2 = (struct mld2_query *) skb->h.raw;
+ struct ifmcaddr6 *ma;
+ struct in6_addr *group;
+ unsigned long max_delay;
+ struct inet6_dev *idev;
+ struct icmp6hdr *hdr;
+ int group_type;
+ int mark = 0;
+ int len;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ return -EINVAL;
+
+ /* compute payload length excluding extension headers */
+ len = ntohs(skb->nh.ipv6h->payload_len) + sizeof(struct ipv6hdr);
+ len -= (char *)skb->h.raw - (char *)skb->nh.ipv6h;
+
+ /* Drop queries with not link local source */
+ if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr)&IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ idev = in6_dev_get(skb->dev);
+
+ if (idev == NULL)
+ return 0;
+
+ hdr = (struct icmp6hdr *) skb->h.raw;
+ group = (struct in6_addr *) (hdr + 1);
+ group_type = ipv6_addr_type(group);
+
+ if (group_type != IPV6_ADDR_ANY &&
+ !(group_type&IPV6_ADDR_MULTICAST)) {
+ in6_dev_put(idev);
+ return -EINVAL;
+ }
+
+ if (len == 24) {
+ int switchback;
+ /* MLDv1 router present */
+
+ /* Translate milliseconds to jiffies */
+ max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
+
+ switchback = (idev->mc_qrv + 1) * max_delay;
+ idev->mc_v1_seen = jiffies + switchback;
+
+ /* cancel the interface change timer */
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+ /* clear deleted report items */
+ mld_clear_delrec(idev);
+ } else if (len >= 28) {
+ max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
+ if (!max_delay)
+ max_delay = 1;
+ idev->mc_maxdelay = max_delay;
+ if (mlh2->qrv)
+ idev->mc_qrv = mlh2->qrv;
+ if (group_type == IPV6_ADDR_ANY) { /* general query */
+ if (mlh2->nsrcs) {
+ in6_dev_put(idev);
+ return -EINVAL; /* no sources allowed */
+ }
+ mld_gq_start_timer(idev);
+ in6_dev_put(idev);
+ return 0;
+ }
+ /* mark sources to include, if group & source-specific */
+ mark = mlh2->nsrcs != 0;
+ } else {
+ in6_dev_put(idev);
+ return -EINVAL;
+ }
+
+ read_lock_bh(&idev->lock);
+ if (group_type == IPV6_ADDR_ANY) {
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ spin_lock_bh(&ma->mca_lock);
+ igmp6_group_queried(ma, max_delay);
+ spin_unlock_bh(&ma->mca_lock);
+ }
+ } else {
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ if (group_type != IPV6_ADDR_ANY &&
+ !ipv6_addr_equal(group, &ma->mca_addr))
+ continue;
+ spin_lock_bh(&ma->mca_lock);
+ if (ma->mca_flags & MAF_TIMER_RUNNING) {
+ /* gsquery <- gsquery && mark */
+ if (!mark)
+ ma->mca_flags &= ~MAF_GSQUERY;
+ } else {
+ /* gsquery <- mark */
+ if (mark)
+ ma->mca_flags |= MAF_GSQUERY;
+ else
+ ma->mca_flags &= ~MAF_GSQUERY;
+ }
+ if (ma->mca_flags & MAF_GSQUERY)
+ mld_marksources(ma, ntohs(mlh2->nsrcs),
+ mlh2->srcs);
+ igmp6_group_queried(ma, max_delay);
+ spin_unlock_bh(&ma->mca_lock);
+ if (group_type != IPV6_ADDR_ANY)
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+
+ return 0;
+}
+
+
+int igmp6_event_report(struct sk_buff *skb)
+{
+ struct ifmcaddr6 *ma;
+ struct in6_addr *addrp;
+ struct inet6_dev *idev;
+ struct icmp6hdr *hdr;
+ int addr_type;
+
+ /* Our own report looped back. Ignore it. */
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ return 0;
+
+ if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+ return -EINVAL;
+
+ hdr = (struct icmp6hdr*) skb->h.raw;
+
+ /* Drop reports with not link local source */
+ addr_type = ipv6_addr_type(&skb->nh.ipv6h->saddr);
+ if (addr_type != IPV6_ADDR_ANY &&
+ !(addr_type&IPV6_ADDR_LINKLOCAL))
+ return -EINVAL;
+
+ addrp = (struct in6_addr *) (hdr + 1);
+
+ idev = in6_dev_get(skb->dev);
+ if (idev == NULL)
+ return -ENODEV;
+
+ /*
+ * Cancel the timer for this group
+ */
+
+ read_lock_bh(&idev->lock);
+ for (ma = idev->mc_list; ma; ma=ma->next) {
+ if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
+ spin_lock(&ma->mca_lock);
+ if (del_timer(&ma->mca_timer))
+ atomic_dec(&ma->mca_refcnt);
+ ma->mca_flags &= ~(MAF_LAST_REPORTER|MAF_TIMER_RUNNING);
+ spin_unlock(&ma->mca_lock);
+ break;
+ }
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ return 0;
+}
+
+static int is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
+ int gdeleted, int sdeleted)
+{
+ switch (type) {
+ case MLD2_MODE_IS_INCLUDE:
+ case MLD2_MODE_IS_EXCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ return !((pmc->mca_flags & MAF_GSQUERY) && !psf->sf_gsresp);
+ case MLD2_CHANGE_TO_INCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ return psf->sf_count[MCAST_INCLUDE] != 0;
+ case MLD2_CHANGE_TO_EXCLUDE:
+ if (gdeleted || sdeleted)
+ return 0;
+ if (pmc->mca_sfcount[MCAST_EXCLUDE] == 0 ||
+ psf->sf_count[MCAST_INCLUDE])
+ return 0;
+ return pmc->mca_sfcount[MCAST_EXCLUDE] ==
+ psf->sf_count[MCAST_EXCLUDE];
+ case MLD2_ALLOW_NEW_SOURCES:
+ if (gdeleted || !psf->sf_crcount)
+ return 0;
+ return (pmc->mca_sfmode == MCAST_INCLUDE) ^ sdeleted;
+ case MLD2_BLOCK_OLD_SOURCES:
+ if (pmc->mca_sfmode == MCAST_INCLUDE)
+ return gdeleted || (psf->sf_crcount && sdeleted);
+ return psf->sf_crcount && !gdeleted && !sdeleted;
+ }
+ return 0;
+}
+
+static int
+mld_scount(struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted)
+{
+ struct ip6_sf_list *psf;
+ int scount = 0;
+
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted))
+ continue;
+ scount++;
+ }
+ return scount;
+}
+
+static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+{
+ struct sock *sk = igmp6_socket->sk;
+ struct sk_buff *skb;
+ struct mld2_report *pmr;
+ struct in6_addr addr_buf;
+ int err;
+ u8 ra[8] = { IPPROTO_ICMPV6, 0,
+ IPV6_TLV_ROUTERALERT, 2, 0, 0,
+ IPV6_TLV_PADN, 0 };
+
+ /* we assume size > sizeof(ra) here */
+ skb = sock_alloc_send_skb(sk, size + LL_RESERVED_SPACE(dev), 1, &err);
+
+ if (skb == 0)
+ return NULL;
+
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ if (dev->hard_header) {
+ unsigned char ha[MAX_ADDR_LEN];
+
+ ndisc_mc_map(&mld2_all_mcr, ha, dev, 1);
+ if (dev->hard_header(skb, dev, ETH_P_IPV6,ha,NULL,size) < 0) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ }
+
+ if (ipv6_get_lladdr(dev, &addr_buf)) {
+ /* <draft-ietf-magma-mld-source-05.txt>:
+ * use unspecified address as the source address
+ * when a valid link-local address is not available.
+ */
+ memset(&addr_buf, 0, sizeof(addr_buf));
+ }
+
+ ip6_nd_hdr(sk, skb, dev, &addr_buf, &mld2_all_mcr, NEXTHDR_HOP, 0);
+
+ memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+ pmr =(struct mld2_report *)skb_put(skb, sizeof(*pmr));
+ skb->h.raw = (unsigned char *)pmr;
+ pmr->type = ICMPV6_MLD2_REPORT;
+ pmr->resv1 = 0;
+ pmr->csum = 0;
+ pmr->resv2 = 0;
+ pmr->ngrec = 0;
+ return skb;
+}
+
+static void mld_sendpack(struct sk_buff *skb)
+{
+ struct ipv6hdr *pip6 = skb->nh.ipv6h;
+ struct mld2_report *pmr = (struct mld2_report *)skb->h.raw;
+ int payload_len, mldlen;
+ struct inet6_dev *idev = in6_dev_get(skb->dev);
+ int err;
+
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ payload_len = skb->tail - (unsigned char *)skb->nh.ipv6h -
+ sizeof(struct ipv6hdr);
+ mldlen = skb->tail - skb->h.raw;
+ pip6->payload_len = htons(payload_len);
+
+ pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+ IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+ dev_queue_xmit);
+ if (!err) {
+ ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
+ IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ } else
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+}
+
+static int grec_size(struct ifmcaddr6 *pmc, int type, int gdel, int sdel)
+{
+ return sizeof(struct mld2_grec) + 4*mld_scount(pmc,type,gdel,sdel);
+}
+
+static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+ int type, struct mld2_grec **ppgr)
+{
+ struct net_device *dev = pmc->idev->dev;
+ struct mld2_report *pmr;
+ struct mld2_grec *pgr;
+
+ if (!skb)
+ skb = mld_newpack(dev, dev->mtu);
+ if (!skb)
+ return NULL;
+ pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
+ pgr->grec_type = type;
+ pgr->grec_auxwords = 0;
+ pgr->grec_nsrcs = 0;
+ pgr->grec_mca = pmc->mca_addr; /* structure copy */
+ pmr = (struct mld2_report *)skb->h.raw;
+ pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
+ *ppgr = pgr;
+ return skb;
+}
+
+#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? (skb)->dev->mtu - (skb)->len : \
+ skb_tailroom(skb)) : 0)
+
+static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
+ int type, int gdeleted, int sdeleted)
+{
+ struct net_device *dev = pmc->idev->dev;
+ struct mld2_report *pmr;
+ struct mld2_grec *pgr = NULL;
+ struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
+ int scount, first, isquery, truncate;
+
+ if (pmc->mca_flags & MAF_NOREPORT)
+ return skb;
+
+ isquery = type == MLD2_MODE_IS_INCLUDE ||
+ type == MLD2_MODE_IS_EXCLUDE;
+ truncate = type == MLD2_MODE_IS_EXCLUDE ||
+ type == MLD2_CHANGE_TO_EXCLUDE;
+
+ psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources;
+
+ if (!*psf_list) {
+ if (type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES)
+ return skb;
+ if (pmc->mca_crcount || isquery) {
+ /* make sure we have room for group header and at
+ * least one source.
+ */
+ if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)+
+ sizeof(struct in6_addr)) {
+ mld_sendpack(skb);
+ skb = NULL; /* add_grhead will get a new one */
+ }
+ skb = add_grhead(skb, pmc, type, &pgr);
+ }
+ return skb;
+ }
+ pmr = skb ? (struct mld2_report *)skb->h.raw : NULL;
+
+ /* EX and TO_EX get a fresh packet, if needed */
+ if (truncate) {
+ if (pmr && pmr->ngrec &&
+ AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
+ if (skb)
+ mld_sendpack(skb);
+ skb = mld_newpack(dev, dev->mtu);
+ }
+ }
+ first = 1;
+ scount = 0;
+ psf_prev = NULL;
+ for (psf=*psf_list; psf; psf=psf_next) {
+ struct in6_addr *psrc;
+
+ psf_next = psf->sf_next;
+
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+ psf_prev = psf;
+ continue;
+ }
+
+ /* clear marks on query responses */
+ if (isquery)
+ psf->sf_gsresp = 0;
+
+ if (AVAILABLE(skb) < sizeof(*psrc) +
+ first*sizeof(struct mld2_grec)) {
+ if (truncate && !first)
+ break; /* truncate these */
+ if (pgr)
+ pgr->grec_nsrcs = htons(scount);
+ if (skb)
+ mld_sendpack(skb);
+ skb = mld_newpack(dev, dev->mtu);
+ first = 1;
+ scount = 0;
+ }
+ if (first) {
+ skb = add_grhead(skb, pmc, type, &pgr);
+ first = 0;
+ }
+ psrc = (struct in6_addr *)skb_put(skb, sizeof(*psrc));
+ *psrc = psf->sf_addr;
+ scount++;
+ if ((type == MLD2_ALLOW_NEW_SOURCES ||
+ type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) {
+ psf->sf_crcount--;
+ if ((sdeleted || gdeleted) && psf->sf_crcount == 0) {
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ *psf_list = psf->sf_next;
+ kfree(psf);
+ continue;
+ }
+ }
+ psf_prev = psf;
+ }
+ if (pgr)
+ pgr->grec_nsrcs = htons(scount);
+
+ if (isquery)
+ pmc->mca_flags &= ~MAF_GSQUERY; /* clear query state */
+ return skb;
+}
+
+static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
+{
+ struct sk_buff *skb = NULL;
+ int type;
+
+ if (!pmc) {
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (pmc->mca_flags & MAF_NOREPORT)
+ continue;
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_MODE_IS_EXCLUDE;
+ else
+ type = MLD2_MODE_IS_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ } else {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ type = MLD2_MODE_IS_EXCLUDE;
+ else
+ type = MLD2_MODE_IS_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ if (skb)
+ mld_sendpack(skb);
+}
+
+/*
+ * remove zero-count source records from a source filter list
+ */
+static void mld_clear_zeros(struct ip6_sf_list **ppsf)
+{
+ struct ip6_sf_list *psf_prev, *psf_next, *psf;
+
+ psf_prev = NULL;
+ for (psf=*ppsf; psf; psf = psf_next) {
+ psf_next = psf->sf_next;
+ if (psf->sf_crcount == 0) {
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ *ppsf = psf->sf_next;
+ kfree(psf);
+ } else
+ psf_prev = psf;
+ }
+}
+
+static void mld_send_cr(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *pmc, *pmc_prev, *pmc_next;
+ struct sk_buff *skb = NULL;
+ int type, dtype;
+
+ read_lock_bh(&idev->lock);
+ write_lock_bh(&idev->mc_lock);
+
+ /* deleted MCA's */
+ pmc_prev = NULL;
+ for (pmc=idev->mc_tomb; pmc; pmc=pmc_next) {
+ pmc_next = pmc->next;
+ if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ type = MLD2_BLOCK_OLD_SOURCES;
+ dtype = MLD2_BLOCK_OLD_SOURCES;
+ skb = add_grec(skb, pmc, type, 1, 0);
+ skb = add_grec(skb, pmc, dtype, 1, 1);
+ }
+ if (pmc->mca_crcount) {
+ pmc->mca_crcount--;
+ if (pmc->mca_sfmode == MCAST_EXCLUDE) {
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 1, 0);
+ }
+ if (pmc->mca_crcount == 0) {
+ mld_clear_zeros(&pmc->mca_tomb);
+ mld_clear_zeros(&pmc->mca_sources);
+ }
+ }
+ if (pmc->mca_crcount == 0 && !pmc->mca_tomb &&
+ !pmc->mca_sources) {
+ if (pmc_prev)
+ pmc_prev->next = pmc_next;
+ else
+ idev->mc_tomb = pmc_next;
+ in6_dev_put(pmc->idev);
+ kfree(pmc);
+ } else
+ pmc_prev = pmc;
+ }
+ write_unlock_bh(&idev->mc_lock);
+
+ /* change recs */
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ spin_lock_bh(&pmc->mca_lock);
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ type = MLD2_BLOCK_OLD_SOURCES;
+ dtype = MLD2_ALLOW_NEW_SOURCES;
+ } else {
+ type = MLD2_ALLOW_NEW_SOURCES;
+ dtype = MLD2_BLOCK_OLD_SOURCES;
+ }
+ skb = add_grec(skb, pmc, type, 0, 0);
+ skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */
+
+ /* filter mode changes */
+ if (pmc->mca_crcount) {
+ pmc->mca_crcount--;
+ if (pmc->mca_sfmode == MCAST_EXCLUDE)
+ type = MLD2_CHANGE_TO_EXCLUDE;
+ else
+ type = MLD2_CHANGE_TO_INCLUDE;
+ skb = add_grec(skb, pmc, type, 0, 0);
+ }
+ spin_unlock_bh(&pmc->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ if (!skb)
+ return;
+ (void) mld_sendpack(skb);
+}
+
+static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
+{
+ struct sock *sk = igmp6_socket->sk;
+ struct inet6_dev *idev;
+ struct sk_buff *skb;
+ struct icmp6hdr *hdr;
+ struct in6_addr *snd_addr;
+ struct in6_addr *addrp;
+ struct in6_addr addr_buf;
+ struct in6_addr all_routers;
+ int err, len, payload_len, full_len;
+ u8 ra[8] = { IPPROTO_ICMPV6, 0,
+ IPV6_TLV_ROUTERALERT, 2, 0, 0,
+ IPV6_TLV_PADN, 0 };
+
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ snd_addr = addr;
+ if (type == ICMPV6_MGM_REDUCTION) {
+ snd_addr = &all_routers;
+ ipv6_addr_all_routers(&all_routers);
+ }
+
+ len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ payload_len = len + sizeof(ra);
+ full_len = sizeof(struct ipv6hdr) + payload_len;
+
+ skb = sock_alloc_send_skb(sk, LL_RESERVED_SPACE(dev) + full_len, 1, &err);
+
+ if (skb == NULL) {
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ return;
+ }
+
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ if (dev->hard_header) {
+ unsigned char ha[MAX_ADDR_LEN];
+ ndisc_mc_map(snd_addr, ha, dev, 1);
+ if (dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len) < 0)
+ goto out;
+ }
+
+ if (ipv6_get_lladdr(dev, &addr_buf)) {
+ /* <draft-ietf-magma-mld-source-05.txt>:
+ * use unspecified address as the source address
+ * when a valid link-local address is not available.
+ */
+ memset(&addr_buf, 0, sizeof(addr_buf));
+ }
+
+ ip6_nd_hdr(sk, skb, dev, &addr_buf, snd_addr, NEXTHDR_HOP, payload_len);
+
+ memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
+
+ hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
+ memset(hdr, 0, sizeof(struct icmp6hdr));
+ hdr->icmp6_type = type;
+
+ addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
+ ipv6_addr_copy(addrp, addr);
+
+ hdr->icmp6_cksum = csum_ipv6_magic(&addr_buf, snd_addr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) hdr, len, 0));
+
+ idev = in6_dev_get(skb->dev);
+
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
+ dev_queue_xmit);
+ if (!err) {
+ if (type == ICMPV6_MGM_REDUCTION)
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
+ else
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBRESPONSES);
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+ IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
+ } else
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+ return;
+
+out:
+ IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
+ kfree_skb(skb);
+}
+
+static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
+ struct in6_addr *psfsrc)
+{
+ struct ip6_sf_list *psf, *psf_prev;
+ int rv = 0;
+
+ psf_prev = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+ break;
+ psf_prev = psf;
+ }
+ if (!psf || psf->sf_count[sfmode] == 0) {
+ /* source filter not found, or count wrong => bug */
+ return -ESRCH;
+ }
+ psf->sf_count[sfmode]--;
+ if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) {
+ struct inet6_dev *idev = pmc->idev;
+
+ /* no more filters for this source */
+ if (psf_prev)
+ psf_prev->sf_next = psf->sf_next;
+ else
+ pmc->mca_sources = psf->sf_next;
+ if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) &&
+ !MLD_V1_SEEN(idev)) {
+ psf->sf_crcount = idev->mc_qrv;
+ psf->sf_next = pmc->mca_tomb;
+ pmc->mca_tomb = psf;
+ rv = 1;
+ } else
+ kfree(psf);
+ }
+ return rv;
+}
+
+static int ip6_mc_del_src(struct inet6_dev *idev, struct in6_addr *pmca,
+ int sfmode, int sfcount, struct in6_addr *psfsrc,
+ int delta)
+{
+ struct ifmcaddr6 *pmc;
+ int changerec = 0;
+ int i, err;
+
+ if (!idev)
+ return -ENODEV;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+ break;
+ }
+ if (!pmc) {
+ /* MCA not found?? bug */
+ read_unlock_bh(&idev->lock);
+ return -ESRCH;
+ }
+ spin_lock_bh(&pmc->mca_lock);
+ sf_markstate(pmc);
+ if (!delta) {
+ if (!pmc->mca_sfcount[sfmode]) {
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return -EINVAL;
+ }
+ pmc->mca_sfcount[sfmode]--;
+ }
+ err = 0;
+ for (i=0; i<sfcount; i++) {
+ int rv = ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+
+ changerec |= rv > 0;
+ if (!err && rv < 0)
+ err = rv;
+ }
+ if (pmc->mca_sfmode == MCAST_EXCLUDE &&
+ pmc->mca_sfcount[MCAST_EXCLUDE] == 0 &&
+ pmc->mca_sfcount[MCAST_INCLUDE]) {
+ struct ip6_sf_list *psf;
+
+ /* filter mode change */
+ pmc->mca_sfmode = MCAST_INCLUDE;
+ pmc->mca_crcount = idev->mc_qrv;
+ idev->mc_ifc_count = pmc->mca_crcount;
+ for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+ mld_ifc_event(pmc->idev);
+ } else if (sf_setstate(pmc) || changerec)
+ mld_ifc_event(pmc->idev);
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+/*
+ * Add multicast single-source filter to the interface list
+ */
+static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode,
+ struct in6_addr *psfsrc, int delta)
+{
+ struct ip6_sf_list *psf, *psf_prev;
+
+ psf_prev = NULL;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (ipv6_addr_equal(&psf->sf_addr, psfsrc))
+ break;
+ psf_prev = psf;
+ }
+ if (!psf) {
+ psf = (struct ip6_sf_list *)kmalloc(sizeof(*psf), GFP_ATOMIC);
+ if (!psf)
+ return -ENOBUFS;
+ memset(psf, 0, sizeof(*psf));
+ psf->sf_addr = *psfsrc;
+ if (psf_prev) {
+ psf_prev->sf_next = psf;
+ } else
+ pmc->mca_sources = psf;
+ }
+ psf->sf_count[sfmode]++;
+ return 0;
+}
+
+static void sf_markstate(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf;
+ int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next)
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ psf->sf_oldin = mca_xcount ==
+ psf->sf_count[MCAST_EXCLUDE] &&
+ !psf->sf_count[MCAST_INCLUDE];
+ } else
+ psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0;
+}
+
+static int sf_setstate(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf;
+ int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE];
+ int qrv = pmc->idev->mc_qrv;
+ int new_in, rv;
+
+ rv = 0;
+ for (psf=pmc->mca_sources; psf; psf=psf->sf_next) {
+ if (pmc->mca_sfcount[MCAST_EXCLUDE]) {
+ new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] &&
+ !psf->sf_count[MCAST_INCLUDE];
+ } else
+ new_in = psf->sf_count[MCAST_INCLUDE] != 0;
+ if (new_in != psf->sf_oldin) {
+ psf->sf_crcount = qrv;
+ rv++;
+ }
+ }
+ return rv;
+}
+
+/*
+ * Add multicast source filter list to the interface list
+ */
+static int ip6_mc_add_src(struct inet6_dev *idev, struct in6_addr *pmca,
+ int sfmode, int sfcount, struct in6_addr *psfsrc,
+ int delta)
+{
+ struct ifmcaddr6 *pmc;
+ int isexclude;
+ int i, err;
+
+ if (!idev)
+ return -ENODEV;
+ read_lock_bh(&idev->lock);
+ for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
+ if (ipv6_addr_equal(pmca, &pmc->mca_addr))
+ break;
+ }
+ if (!pmc) {
+ /* MCA not found?? bug */
+ read_unlock_bh(&idev->lock);
+ return -ESRCH;
+ }
+ spin_lock_bh(&pmc->mca_lock);
+
+ sf_markstate(pmc);
+ isexclude = pmc->mca_sfmode == MCAST_EXCLUDE;
+ if (!delta)
+ pmc->mca_sfcount[sfmode]++;
+ err = 0;
+ for (i=0; i<sfcount; i++) {
+ err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i], delta);
+ if (err)
+ break;
+ }
+ if (err) {
+ int j;
+
+ if (!delta)
+ pmc->mca_sfcount[sfmode]--;
+ for (j=0; j<i; j++)
+ (void) ip6_mc_del1_src(pmc, sfmode, &psfsrc[i]);
+ } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) {
+ struct inet6_dev *idev = pmc->idev;
+ struct ip6_sf_list *psf;
+
+ /* filter mode change */
+ if (pmc->mca_sfcount[MCAST_EXCLUDE])
+ pmc->mca_sfmode = MCAST_EXCLUDE;
+ else if (pmc->mca_sfcount[MCAST_INCLUDE])
+ pmc->mca_sfmode = MCAST_INCLUDE;
+ /* else no filters; keep old mode for reports */
+
+ pmc->mca_crcount = idev->mc_qrv;
+ idev->mc_ifc_count = pmc->mca_crcount;
+ for (psf=pmc->mca_sources; psf; psf = psf->sf_next)
+ psf->sf_crcount = 0;
+ mld_ifc_event(idev);
+ } else if (sf_setstate(pmc))
+ mld_ifc_event(idev);
+ spin_unlock_bh(&pmc->mca_lock);
+ read_unlock_bh(&idev->lock);
+ return err;
+}
+
+static void ip6_mc_clear_src(struct ifmcaddr6 *pmc)
+{
+ struct ip6_sf_list *psf, *nextpsf;
+
+ for (psf=pmc->mca_tomb; psf; psf=nextpsf) {
+ nextpsf = psf->sf_next;
+ kfree(psf);
+ }
+ pmc->mca_tomb = NULL;
+ for (psf=pmc->mca_sources; psf; psf=nextpsf) {
+ nextpsf = psf->sf_next;
+ kfree(psf);
+ }
+ pmc->mca_sources = NULL;
+ pmc->mca_sfmode = MCAST_EXCLUDE;
+ pmc->mca_sfcount[MCAST_EXCLUDE] = 0;
+ pmc->mca_sfcount[MCAST_EXCLUDE] = 1;
+}
+
+
+static void igmp6_join_group(struct ifmcaddr6 *ma)
+{
+ unsigned long delay;
+
+ if (ma->mca_flags & MAF_NOREPORT)
+ return;
+
+ igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+
+ delay = net_random() % IGMP6_UNSOLICITED_IVAL;
+
+ spin_lock_bh(&ma->mca_lock);
+ if (del_timer(&ma->mca_timer)) {
+ atomic_dec(&ma->mca_refcnt);
+ delay = ma->mca_timer.expires - jiffies;
+ }
+
+ if (!mod_timer(&ma->mca_timer, jiffies + delay))
+ atomic_inc(&ma->mca_refcnt);
+ ma->mca_flags |= MAF_TIMER_RUNNING | MAF_LAST_REPORTER;
+ spin_unlock_bh(&ma->mca_lock);
+}
+
+static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
+ struct inet6_dev *idev)
+{
+ int err;
+
+ if (iml->sflist == 0) {
+ /* any-source empty exclude case */
+ return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0);
+ }
+ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
+ iml->sflist->sl_count, iml->sflist->sl_addr, 0);
+ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max));
+ iml->sflist = NULL;
+ return err;
+}
+
+static void igmp6_leave_group(struct ifmcaddr6 *ma)
+{
+ if (MLD_V1_SEEN(ma->idev)) {
+ if (ma->mca_flags & MAF_LAST_REPORTER)
+ igmp6_send(&ma->mca_addr, ma->idev->dev,
+ ICMPV6_MGM_REDUCTION);
+ } else {
+ mld_add_delrec(ma->idev, ma);
+ mld_ifc_event(ma->idev);
+ }
+}
+
+static void mld_gq_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ idev->mc_gq_running = 0;
+ mld_send_report(idev, NULL);
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_timer_expire(unsigned long data)
+{
+ struct inet6_dev *idev = (struct inet6_dev *)data;
+
+ mld_send_cr(idev);
+ if (idev->mc_ifc_count) {
+ idev->mc_ifc_count--;
+ if (idev->mc_ifc_count)
+ mld_ifc_start_timer(idev, idev->mc_maxdelay);
+ }
+ __in6_dev_put(idev);
+}
+
+static void mld_ifc_event(struct inet6_dev *idev)
+{
+ if (MLD_V1_SEEN(idev))
+ return;
+ idev->mc_ifc_count = idev->mc_qrv;
+ mld_ifc_start_timer(idev, 1);
+}
+
+
+static void igmp6_timer_handler(unsigned long data)
+{
+ struct ifmcaddr6 *ma = (struct ifmcaddr6 *) data;
+
+ if (MLD_V1_SEEN(ma->idev))
+ igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT);
+ else
+ mld_send_report(ma->idev, ma);
+
+ spin_lock(&ma->mca_lock);
+ ma->mca_flags |= MAF_LAST_REPORTER;
+ ma->mca_flags &= ~MAF_TIMER_RUNNING;
+ spin_unlock(&ma->mca_lock);
+ ma_put(ma);
+}
+
+/* Device going down */
+
+void ipv6_mc_down(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Withdraw multicast list */
+
+ read_lock_bh(&idev->lock);
+ idev->mc_ifc_count = 0;
+ if (del_timer(&idev->mc_ifc_timer))
+ __in6_dev_put(idev);
+ idev->mc_gq_running = 0;
+ if (del_timer(&idev->mc_gq_timer))
+ __in6_dev_put(idev);
+
+ for (i = idev->mc_list; i; i=i->next)
+ igmp6_group_dropped(i);
+ read_unlock_bh(&idev->lock);
+
+ mld_clear_delrec(idev);
+}
+
+
+/* Device going up */
+
+void ipv6_mc_up(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+
+ /* Install multicast list, except for all-nodes (already installed) */
+
+ read_lock_bh(&idev->lock);
+ for (i = idev->mc_list; i; i=i->next)
+ igmp6_group_added(i);
+ read_unlock_bh(&idev->lock);
+}
+
+/* IPv6 device initialization. */
+
+void ipv6_mc_init_dev(struct inet6_dev *idev)
+{
+ struct in6_addr maddr;
+
+ write_lock_bh(&idev->lock);
+ rwlock_init(&idev->mc_lock);
+ idev->mc_gq_running = 0;
+ init_timer(&idev->mc_gq_timer);
+ idev->mc_gq_timer.data = (unsigned long) idev;
+ idev->mc_gq_timer.function = &mld_gq_timer_expire;
+ idev->mc_tomb = NULL;
+ idev->mc_ifc_count = 0;
+ init_timer(&idev->mc_ifc_timer);
+ idev->mc_ifc_timer.data = (unsigned long) idev;
+ idev->mc_ifc_timer.function = &mld_ifc_timer_expire;
+ idev->mc_qrv = MLD_QRV_DEFAULT;
+ idev->mc_maxdelay = IGMP6_UNSOLICITED_IVAL;
+ idev->mc_v1_seen = 0;
+ write_unlock_bh(&idev->lock);
+
+ /* Add all-nodes address. */
+ ipv6_addr_all_nodes(&maddr);
+ ipv6_dev_mc_inc(idev->dev, &maddr);
+}
+
+/*
+ * Device is about to be destroyed: clean up.
+ */
+
+void ipv6_mc_destroy_dev(struct inet6_dev *idev)
+{
+ struct ifmcaddr6 *i;
+ struct in6_addr maddr;
+
+ /* Deactivate timers */
+ ipv6_mc_down(idev);
+
+ /* Delete all-nodes address. */
+ ipv6_addr_all_nodes(&maddr);
+
+ /* We cannot call ipv6_dev_mc_dec() directly, our caller in
+ * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will
+ * fail.
+ */
+ __ipv6_dev_mc_dec(idev, &maddr);
+
+ if (idev->cnf.forwarding) {
+ ipv6_addr_all_routers(&maddr);
+ __ipv6_dev_mc_dec(idev, &maddr);
+ }
+
+ write_lock_bh(&idev->lock);
+ while ((i = idev->mc_list) != NULL) {
+ idev->mc_list = i->next;
+ write_unlock_bh(&idev->lock);
+
+ igmp6_group_dropped(i);
+ ma_put(i);
+
+ write_lock_bh(&idev->lock);
+ }
+ write_unlock_bh(&idev->lock);
+}
+
+#ifdef CONFIG_PROC_FS
+struct igmp6_mc_iter_state {
+ struct net_device *dev;
+ struct inet6_dev *idev;
+};
+
+#define igmp6_mc_seq_private(seq) ((struct igmp6_mc_iter_state *)(seq)->private)
+
+static inline struct ifmcaddr6 *igmp6_mc_get_first(struct seq_file *seq)
+{
+ struct ifmcaddr6 *im = NULL;
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ for (state->dev = dev_base, state->idev = NULL;
+ state->dev;
+ state->dev = state->dev->next) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (!idev)
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->mc_list;
+ if (im) {
+ state->idev = idev;
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ }
+ return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr6 *im)
+{
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ im = im->next;
+ while (!im) {
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ }
+ state->dev = state->dev->next;
+ if (!state->dev) {
+ state->idev = NULL;
+ break;
+ }
+ state->idev = in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ im = state->idev->mc_list;
+ }
+ return im;
+}
+
+static struct ifmcaddr6 *igmp6_mc_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ifmcaddr6 *im = igmp6_mc_get_first(seq);
+ if (im)
+ while (pos && (im = igmp6_mc_get_next(seq, im)) != NULL)
+ --pos;
+ return pos ? NULL : im;
+}
+
+static void *igmp6_mc_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock(&dev_base_lock);
+ return igmp6_mc_get_idx(seq, *pos);
+}
+
+static void *igmp6_mc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ifmcaddr6 *im;
+ im = igmp6_mc_get_next(seq, v);
+ ++*pos;
+ return im;
+}
+
+static void igmp6_mc_seq_stop(struct seq_file *seq, void *v)
+{
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ state->idev = NULL;
+ }
+ state->dev = NULL;
+ read_unlock(&dev_base_lock);
+}
+
+static int igmp6_mc_seq_show(struct seq_file *seq, void *v)
+{
+ struct ifmcaddr6 *im = (struct ifmcaddr6 *)v;
+ struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+
+ seq_printf(seq,
+ "%-4d %-15s %04x%04x%04x%04x%04x%04x%04x%04x %5d %08X %ld\n",
+ state->dev->ifindex, state->dev->name,
+ NIP6(im->mca_addr),
+ im->mca_users, im->mca_flags,
+ (im->mca_flags&MAF_TIMER_RUNNING) ?
+ jiffies_to_clock_t(im->mca_timer.expires-jiffies) : 0);
+ return 0;
+}
+
+static struct seq_operations igmp6_mc_seq_ops = {
+ .start = igmp6_mc_seq_start,
+ .next = igmp6_mc_seq_next,
+ .stop = igmp6_mc_seq_stop,
+ .show = igmp6_mc_seq_show,
+};
+
+static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+ struct igmp6_mc_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
+
+ rc = seq_open(file, &igmp6_mc_seq_ops);
+ if (rc)
+ goto out_kfree;
+
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
+out:
+ return rc;
+out_kfree:
+ kfree(s);
+ goto out;
+}
+
+static struct file_operations igmp6_mc_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = igmp6_mc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+
+struct igmp6_mcf_iter_state {
+ struct net_device *dev;
+ struct inet6_dev *idev;
+ struct ifmcaddr6 *im;
+};
+
+#define igmp6_mcf_seq_private(seq) ((struct igmp6_mcf_iter_state *)(seq)->private)
+
+static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq)
+{
+ struct ip6_sf_list *psf = NULL;
+ struct ifmcaddr6 *im = NULL;
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+ for (state->dev = dev_base, state->idev = NULL, state->im = NULL;
+ state->dev;
+ state->dev = state->dev->next) {
+ struct inet6_dev *idev;
+ idev = in6_dev_get(state->dev);
+ if (unlikely(idev == NULL))
+ continue;
+ read_lock_bh(&idev->lock);
+ im = idev->mc_list;
+ if (likely(im != NULL)) {
+ spin_lock_bh(&im->mca_lock);
+ psf = im->mca_sources;
+ if (likely(psf != NULL)) {
+ state->im = im;
+ state->idev = idev;
+ break;
+ }
+ spin_unlock_bh(&im->mca_lock);
+ }
+ read_unlock_bh(&idev->lock);
+ in6_dev_put(idev);
+ }
+ return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_sf_list *psf)
+{
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+ psf = psf->sf_next;
+ while (!psf) {
+ spin_unlock_bh(&state->im->mca_lock);
+ state->im = state->im->next;
+ while (!state->im) {
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ }
+ state->dev = state->dev->next;
+ if (!state->dev) {
+ state->idev = NULL;
+ goto out;
+ }
+ state->idev = in6_dev_get(state->dev);
+ if (!state->idev)
+ continue;
+ read_lock_bh(&state->idev->lock);
+ state->im = state->idev->mc_list;
+ }
+ if (!state->im)
+ break;
+ spin_lock_bh(&state->im->mca_lock);
+ psf = state->im->mca_sources;
+ }
+out:
+ return psf;
+}
+
+static struct ip6_sf_list *igmp6_mcf_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct ip6_sf_list *psf = igmp6_mcf_get_first(seq);
+ if (psf)
+ while (pos && (psf = igmp6_mcf_get_next(seq, psf)) != NULL)
+ --pos;
+ return pos ? NULL : psf;
+}
+
+static void *igmp6_mcf_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ read_lock(&dev_base_lock);
+ return *pos ? igmp6_mcf_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *igmp6_mcf_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct ip6_sf_list *psf;
+ if (v == SEQ_START_TOKEN)
+ psf = igmp6_mcf_get_first(seq);
+ else
+ psf = igmp6_mcf_get_next(seq, v);
+ ++*pos;
+ return psf;
+}
+
+static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v)
+{
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+ if (likely(state->im != NULL)) {
+ spin_unlock_bh(&state->im->mca_lock);
+ state->im = NULL;
+ }
+ if (likely(state->idev != NULL)) {
+ read_unlock_bh(&state->idev->lock);
+ in6_dev_put(state->idev);
+ state->idev = NULL;
+ }
+ state->dev = NULL;
+ read_unlock(&dev_base_lock);
+}
+
+static int igmp6_mcf_seq_show(struct seq_file *seq, void *v)
+{
+ struct ip6_sf_list *psf = (struct ip6_sf_list *)v;
+ struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq);
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq,
+ "%3s %6s "
+ "%32s %32s %6s %6s\n", "Idx",
+ "Device", "Multicast Address",
+ "Source Address", "INC", "EXC");
+ } else {
+ seq_printf(seq,
+ "%3d %6.6s "
+ "%04x%04x%04x%04x%04x%04x%04x%04x "
+ "%04x%04x%04x%04x%04x%04x%04x%04x "
+ "%6lu %6lu\n",
+ state->dev->ifindex, state->dev->name,
+ NIP6(state->im->mca_addr),
+ NIP6(psf->sf_addr),
+ psf->sf_count[MCAST_INCLUDE],
+ psf->sf_count[MCAST_EXCLUDE]);
+ }
+ return 0;
+}
+
+static struct seq_operations igmp6_mcf_seq_ops = {
+ .start = igmp6_mcf_seq_start,
+ .next = igmp6_mcf_seq_next,
+ .stop = igmp6_mcf_seq_stop,
+ .show = igmp6_mcf_seq_show,
+};
+
+static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int rc = -ENOMEM;
+ struct igmp6_mcf_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+ if (!s)
+ goto out;
+
+ rc = seq_open(file, &igmp6_mcf_seq_ops);
+ if (rc)
+ goto out_kfree;
+
+ seq = file->private_data;
+ seq->private = s;
+ memset(s, 0, sizeof(*s));
+out:
+ return rc;
+out_kfree:
+ kfree(s);
+ goto out;
+}
+
+static struct file_operations igmp6_mcf_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = igmp6_mcf_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_private,
+};
+#endif
+
+int __init igmp6_init(struct net_proto_family *ops)
+{
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ int err;
+
+ err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &igmp6_socket);
+ if (err < 0) {
+ printk(KERN_ERR
+ "Failed to initialize the IGMP6 control socket (err %d).\n",
+ err);
+ igmp6_socket = NULL; /* For safety. */
+ return err;
+ }
+
+ sk = igmp6_socket->sk;
+ sk->sk_allocation = GFP_ATOMIC;
+ sk->sk_prot->unhash(sk);
+
+ np = inet6_sk(sk);
+ np->hop_limit = 1;
+
+#ifdef CONFIG_PROC_FS
+ proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+ proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+#endif
+
+ return 0;
+}
+
+void igmp6_cleanup(void)
+{
+ sock_release(igmp6_socket);
+ igmp6_socket = NULL; /* for safety */
+
+#ifdef CONFIG_PROC_FS
+ proc_net_remove("mcfilter6");
+ proc_net_remove("igmp6");
+#endif
+}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
new file mode 100644
index 000000000000..7c291f4e9edc
--- /dev/null
+++ b/net/ipv6/ndisc.c
@@ -0,0 +1,1690 @@
+/*
+ * Neighbour Discovery for IPv6
+ * Linux INET6 implementation
+ *
+ * Authors:
+ * Pedro Roque <roque@di.fc.ul.pt>
+ * Mike Shaver <shaver@ingenia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * Changes:
+ *
+ * Lars Fenneberg : fixed MTU setting on receipt
+ * of an RA.
+ *
+ * Janos Farkas : kmalloc failure checks
+ * Alexey Kuznetsov : state machine reworked
+ * and moved to net/core.
+ * Pekka Savola : RFC2461 validation
+ * YOSHIFUJI Hideaki @USAGI : Verify ND options properly
+ */
+
+/* Set to 3 to get tracing... */
+#define ND_DEBUG 1
+
+#define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
+#define ND_NOPRINTK(x...) do { ; } while(0)
+#define ND_PRINTK0 ND_PRINTK
+#define ND_PRINTK1 ND_NOPRINTK
+#define ND_PRINTK2 ND_NOPRINTK
+#define ND_PRINTK3 ND_NOPRINTK
+#if ND_DEBUG >= 1
+#undef ND_PRINTK1
+#define ND_PRINTK1 ND_PRINTK
+#endif
+#if ND_DEBUG >= 2
+#undef ND_PRINTK2
+#define ND_PRINTK2 ND_PRINTK
+#endif
+#if ND_DEBUG >= 3
+#undef ND_PRINTK3
+#define ND_PRINTK3 ND_PRINTK
+#endif
+
+#include <linux/module.h>
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/sched.h>
+#include <linux/net.h>
+#include <linux/in6.h>
+#include <linux/route.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+#include <linux/if_arp.h>
+#include <linux/ipv6.h>
+#include <linux/icmpv6.h>
+#include <linux/jhash.h>
+
+#include <net/sock.h>
+#include <net/snmp.h>
+
+#include <net/ipv6.h>
+#include <net/protocol.h>
+#include <net/ndisc.h>
+#include <net/ip6_route.h>
+#include <net/addrconf.h>
+#include <net/icmp.h>
+
+#include <net/flow.h>
+#include <net/ip6_checksum.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+
+static struct socket *ndisc_socket;
+
+static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
+static int ndisc_constructor(struct neighbour *neigh);
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
+static int pndisc_constructor(struct pneigh_entry *n);
+static void pndisc_destructor(struct pneigh_entry *n);
+static void pndisc_redo(struct sk_buff *skb);
+
+static struct neigh_ops ndisc_generic_ops = {
+ .family = AF_INET6,
+ .solicit = ndisc_solicit,
+ .error_report = ndisc_error_report,
+ .output = neigh_resolve_output,
+ .connected_output = neigh_connected_output,
+ .hh_output = dev_queue_xmit,
+ .queue_xmit = dev_queue_xmit,
+};
+
+static struct neigh_ops ndisc_hh_ops = {
+ .family = AF_INET6,
+ .solicit = ndisc_solicit,
+ .error_report = ndisc_error_report,
+ .output = neigh_resolve_output,
+ .connected_output = neigh_resolve_output,
+ .hh_output = dev_queue_xmit,
+ .queue_xmit = dev_queue_xmit,
+};
+
+
+static struct neigh_ops ndisc_direct_ops = {
+ .family = AF_INET6,
+ .output = dev_queue_xmit,
+ .connected_output = dev_queue_xmit,
+ .hh_output = dev_queue_xmit,
+ .queue_xmit = dev_queue_xmit,
+};
+
+struct neigh_table nd_tbl = {
+ .family = AF_INET6,
+ .entry_size = sizeof(struct neighbour) + sizeof(struct in6_addr),
+ .key_len = sizeof(struct in6_addr),
+ .hash = ndisc_hash,
+ .constructor = ndisc_constructor,
+ .pconstructor = pndisc_constructor,
+ .pdestructor = pndisc_destructor,
+ .proxy_redo = pndisc_redo,
+ .id = "ndisc_cache",
+ .parms = {
+ .tbl = &nd_tbl,
+ .base_reachable_time = 30 * HZ,
+ .retrans_time = 1 * HZ,
+ .gc_staletime = 60 * HZ,
+ .reachable_time = 30 * HZ,
+ .delay_probe_time = 5 * HZ,
+ .queue_len = 3,
+ .ucast_probes = 3,
+ .mcast_probes = 3,
+ .anycast_delay = 1 * HZ,
+ .proxy_delay = (8 * HZ) / 10,
+ .proxy_qlen = 64,
+ },
+ .gc_interval = 30 * HZ,
+ .gc_thresh1 = 128,
+ .gc_thresh2 = 512,
+ .gc_thresh3 = 1024,
+};
+
+/* ND options */
+struct ndisc_options {
+ struct nd_opt_hdr *nd_opt_array[__ND_OPT_MAX];
+};
+
+#define nd_opts_src_lladdr nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+#define nd_opts_tgt_lladdr nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+#define nd_opts_pi nd_opt_array[ND_OPT_PREFIX_INFO]
+#define nd_opts_pi_end nd_opt_array[__ND_OPT_PREFIX_INFO_END]
+#define nd_opts_rh nd_opt_array[ND_OPT_REDIRECT_HDR]
+#define nd_opts_mtu nd_opt_array[ND_OPT_MTU]
+
+#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
+
+/*
+ * Return the padding between the option length and the start of the
+ * link addr. Currently only IP-over-InfiniBand needs this, although
+ * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
+ * also need a pad of 2.
+ */
+static int ndisc_addr_option_pad(unsigned short type)
+{
+ switch (type) {
+ case ARPHRD_INFINIBAND: return 2;
+ default: return 0;
+ }
+}
+
+static inline int ndisc_opt_addr_space(struct net_device *dev)
+{
+ return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
+}
+
+static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
+ unsigned short addr_type)
+{
+ int space = NDISC_OPT_SPACE(data_len);
+ int pad = ndisc_addr_option_pad(addr_type);
+
+ opt[0] = type;
+ opt[1] = space>>3;
+
+ memset(opt + 2, 0, pad);
+ opt += pad;
+ space -= pad;
+
+ memcpy(opt+2, data, data_len);
+ data_len += 2;
+ opt += data_len;
+ if ((space -= data_len) > 0)
+ memset(opt, 0, space);
+ return opt + space;
+}
+
+static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+ struct nd_opt_hdr *end)
+{
+ int type;
+ if (!cur || !end || cur >= end)
+ return NULL;
+ type = cur->nd_opt_type;
+ do {
+ cur = ((void *)cur) + (cur->nd_opt_len << 3);
+ } while(cur < end && cur->nd_opt_type != type);
+ return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
+}
+
+static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+ struct ndisc_options *ndopts)
+{
+ struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
+
+ if (!nd_opt || opt_len < 0 || !ndopts)
+ return NULL;
+ memset(ndopts, 0, sizeof(*ndopts));
+ while (opt_len) {
+ int l;
+ if (opt_len < sizeof(struct nd_opt_hdr))
+ return NULL;
+ l = nd_opt->nd_opt_len << 3;
+ if (opt_len < l || l == 0)
+ return NULL;
+ switch (nd_opt->nd_opt_type) {
+ case ND_OPT_SOURCE_LL_ADDR:
+ case ND_OPT_TARGET_LL_ADDR:
+ case ND_OPT_MTU:
+ case ND_OPT_REDIRECT_HDR:
+ if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
+ ND_PRINTK2(KERN_WARNING
+ "%s(): duplicated ND6 option found: type=%d\n",
+ __FUNCTION__,
+ nd_opt->nd_opt_type);
+ } else {
+ ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ }
+ break;
+ case ND_OPT_PREFIX_INFO:
+ ndopts->nd_opts_pi_end = nd_opt;
+ if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0)
+ ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
+ break;
+ default:
+ /*
+ * Unknown options must be silently ignored,
+ * to accommodate future extension to the protocol.
+ */
+ ND_PRINTK2(KERN_NOTICE
+ "%s(): ignored unsupported option; type=%d, len=%d\n",
+ __FUNCTION__,
+ nd_opt->nd_opt_type, nd_opt->nd_opt_len);
+ }
+ opt_len -= l;
+ nd_opt = ((void *)nd_opt) + l;
+ }
+ return ndopts;
+}
+
+static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
+ struct net_device *dev)
+{
+ u8 *lladdr = (u8 *)(p + 1);
+ int lladdrlen = p->nd_opt_len << 3;
+ int prepad = ndisc_addr_option_pad(dev->type);
+ if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
+ return NULL;
+ return (lladdr + prepad);
+}
+
+int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
+{
+ switch (dev->type) {
+ case ARPHRD_ETHER:
+ case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */
+ case ARPHRD_FDDI:
+ ipv6_eth_mc_map(addr, buf);
+ return 0;
+ case ARPHRD_IEEE802_TR:
+ ipv6_tr_mc_map(addr,buf);
+ return 0;
+ case ARPHRD_ARCNET:
+ ipv6_arcnet_mc_map(addr, buf);
+ return 0;
+ case ARPHRD_INFINIBAND:
+ ipv6_ib_mc_map(addr, buf);
+ return 0;
+ default:
+ if (dir) {
+ memcpy(buf, dev->broadcast, dev->addr_len);
+ return 0;
+ }
+ }
+ return -EINVAL;
+}
+
+static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
+{
+ const u32 *p32 = pkey;
+ u32 addr_hash, i;
+
+ addr_hash = 0;
+ for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
+ addr_hash ^= *p32++;
+
+ return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
+}
+
+static int ndisc_constructor(struct neighbour *neigh)
+{
+ struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
+ struct net_device *dev = neigh->dev;
+ struct inet6_dev *in6_dev;
+ struct neigh_parms *parms;
+ int is_multicast = ipv6_addr_is_multicast(addr);
+
+ rcu_read_lock();
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev == NULL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ parms = in6_dev->nd_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
+
+ neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
+ if (dev->hard_header == NULL) {
+ neigh->nud_state = NUD_NOARP;
+ neigh->ops = &ndisc_direct_ops;
+ neigh->output = neigh->ops->queue_xmit;
+ } else {
+ if (is_multicast) {
+ neigh->nud_state = NUD_NOARP;
+ ndisc_mc_map(addr, neigh->ha, dev, 1);
+ } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
+ if (dev->flags&IFF_LOOPBACK)
+ neigh->type = RTN_LOCAL;
+ } else if (dev->flags&IFF_POINTOPOINT) {
+ neigh->nud_state = NUD_NOARP;
+ memcpy(neigh->ha, dev->broadcast, dev->addr_len);
+ }
+ if (dev->hard_header_cache)
+ neigh->ops = &ndisc_hh_ops;
+ else
+ neigh->ops = &ndisc_generic_ops;
+ if (neigh->nud_state&NUD_VALID)
+ neigh->output = neigh->ops->connected_output;
+ else
+ neigh->output = neigh->ops->output;
+ }
+ in6_dev_put(in6_dev);
+ return 0;
+}
+
+static int pndisc_constructor(struct pneigh_entry *n)
+{
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct net_device *dev = n->dev;
+
+ if (dev == NULL || __in6_dev_get(dev) == NULL)
+ return -EINVAL;
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_inc(dev, &maddr);
+ return 0;
+}
+
+static void pndisc_destructor(struct pneigh_entry *n)
+{
+ struct in6_addr *addr = (struct in6_addr*)&n->key;
+ struct in6_addr maddr;
+ struct net_device *dev = n->dev;
+
+ if (dev == NULL || __in6_dev_get(dev) == NULL)
+ return;
+ addrconf_addr_solict_mult(addr, &maddr);
+ ipv6_dev_mc_dec(dev, &maddr);
+}
+
+/*
+ * Send a Neighbour Advertisement
+ */
+
+static inline void ndisc_flow_init(struct flowi *fl, u8 type,
+ struct in6_addr *saddr, struct in6_addr *daddr)
+{
+ memset(fl, 0, sizeof(*fl));
+ ipv6_addr_copy(&fl->fl6_src, saddr);
+ ipv6_addr_copy(&fl->fl6_dst, daddr);
+ fl->proto = IPPROTO_ICMPV6;
+ fl->fl_icmp_type = type;
+ fl->fl_icmp_code = 0;
+}
+
+static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ struct in6_addr *daddr, struct in6_addr *solicited_addr,
+ int router, int solicited, int override, int inc_opt)
+{
+ struct in6_addr tmpaddr;
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev;
+ struct flowi fl;
+ struct dst_entry* dst;
+ struct sock *sk = ndisc_socket->sk;
+ struct in6_addr *src_addr;
+ struct nd_msg *msg;
+ int len;
+ struct sk_buff *skb;
+ int err;
+
+ len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+
+ /* for anycast or proxy, solicited_addr != src_addr */
+ ifp = ipv6_get_ifaddr(solicited_addr, dev, 1);
+ if (ifp) {
+ src_addr = solicited_addr;
+ in6_ifa_put(ifp);
+ } else {
+ if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr))
+ return;
+ src_addr = &tmpaddr;
+ }
+
+ ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
+
+ dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
+ if (!dst)
+ return;
+
+ err = xfrm_lookup(&dst, &fl, NULL, 0);
+ if (err < 0) {
+ dst_release(dst);
+ return;
+ }
+
+ if (inc_opt) {
+ if (dev->addr_len)
+ len += ndisc_opt_addr_space(dev);
+ else
+ inc_opt = 0;
+ }
+
+ skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ 1, &err);
+
+ if (skb == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 NA: %s() failed to allocate an skb.\n",
+ __FUNCTION__);
+ dst_release(dst);
+ return;
+ }
+
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
+
+ msg = (struct nd_msg *)skb_put(skb, len);
+ skb->h.raw = (unsigned char*)msg;
+
+ msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+ msg->icmph.icmp6_code = 0;
+ msg->icmph.icmp6_cksum = 0;
+
+ msg->icmph.icmp6_unused = 0;
+ msg->icmph.icmp6_router = router;
+ msg->icmph.icmp6_solicited = solicited;
+ msg->icmph.icmp6_override = !!override;
+
+ /* Set the target address. */
+ ipv6_addr_copy(&msg->target, solicited_addr);
+
+ if (inc_opt)
+ ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
+
+ /* checksum */
+ msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) msg,
+ len, 0));
+
+ skb->dst = dst;
+ idev = in6_dev_get(dst->dev);
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+ if (!err) {
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORADVERTISEMENTS);
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+}
+
+void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
+ struct in6_addr *solicit,
+ struct in6_addr *daddr, struct in6_addr *saddr)
+{
+ struct flowi fl;
+ struct dst_entry* dst;
+ struct inet6_dev *idev;
+ struct sock *sk = ndisc_socket->sk;
+ struct sk_buff *skb;
+ struct nd_msg *msg;
+ struct in6_addr addr_buf;
+ int len;
+ int err;
+ int send_llinfo;
+
+ if (saddr == NULL) {
+ if (ipv6_get_lladdr(dev, &addr_buf))
+ return;
+ saddr = &addr_buf;
+ }
+
+ ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
+
+ dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output);
+ if (!dst)
+ return;
+
+ err = xfrm_lookup(&dst, &fl, NULL, 0);
+ if (err < 0) {
+ dst_release(dst);
+ return;
+ }
+
+ len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ send_llinfo = dev->addr_len && !ipv6_addr_any(saddr);
+ if (send_llinfo)
+ len += ndisc_opt_addr_space(dev);
+
+ skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ 1, &err);
+ if (skb == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 NA: %s() failed to allocate an skb.\n",
+ __FUNCTION__);
+ dst_release(dst);
+ return;
+ }
+
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+
+ msg = (struct nd_msg *)skb_put(skb, len);
+ skb->h.raw = (unsigned char*)msg;
+ msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
+ msg->icmph.icmp6_code = 0;
+ msg->icmph.icmp6_cksum = 0;
+ msg->icmph.icmp6_unused = 0;
+
+ /* Set the target address. */
+ ipv6_addr_copy(&msg->target, solicit);
+
+ if (send_llinfo)
+ ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
+
+ /* checksum */
+ msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
+ daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) msg,
+ len, 0));
+ /* send it! */
+ skb->dst = dst;
+ idev = in6_dev_get(dst->dev);
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+ if (!err) {
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTNEIGHBORSOLICITS);
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+}
+
+void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
+ struct in6_addr *daddr)
+{
+ struct flowi fl;
+ struct dst_entry* dst;
+ struct inet6_dev *idev;
+ struct sock *sk = ndisc_socket->sk;
+ struct sk_buff *skb;
+ struct icmp6hdr *hdr;
+ __u8 * opt;
+ int len;
+ int err;
+
+ ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
+
+ dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output);
+ if (!dst)
+ return;
+
+ err = xfrm_lookup(&dst, &fl, NULL, 0);
+ if (err < 0) {
+ dst_release(dst);
+ return;
+ }
+
+ len = sizeof(struct icmp6hdr);
+ if (dev->addr_len)
+ len += ndisc_opt_addr_space(dev);
+
+ skb = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ 1, &err);
+ if (skb == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RS: %s() failed to allocate an skb.\n",
+ __FUNCTION__);
+ dst_release(dst);
+ return;
+ }
+
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+
+ hdr = (struct icmp6hdr *)skb_put(skb, len);
+ skb->h.raw = (unsigned char*)hdr;
+ hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
+ hdr->icmp6_code = 0;
+ hdr->icmp6_cksum = 0;
+ hdr->icmp6_unused = 0;
+
+ opt = (u8*) (hdr + 1);
+
+ if (dev->addr_len)
+ ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
+
+ /* checksum */
+ hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
+ IPPROTO_ICMPV6,
+ csum_partial((__u8 *) hdr, len, 0));
+
+ /* send it! */
+ skb->dst = dst;
+ idev = in6_dev_get(dst->dev);
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, dst_output);
+ if (!err) {
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTROUTERSOLICITS);
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+}
+
+
+static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
+{
+ /*
+ * "The sender MUST return an ICMP
+ * destination unreachable"
+ */
+ dst_link_failure(skb);
+ kfree_skb(skb);
+}
+
+/* Called with locked neigh: either read or both */
+
+static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
+{
+ struct in6_addr *saddr = NULL;
+ struct in6_addr mcaddr;
+ struct net_device *dev = neigh->dev;
+ struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
+ int probes = atomic_read(&neigh->probes);
+
+ if (skb && ipv6_chk_addr(&skb->nh.ipv6h->saddr, dev, 1))
+ saddr = &skb->nh.ipv6h->saddr;
+
+ if ((probes -= neigh->parms->ucast_probes) < 0) {
+ if (!(neigh->nud_state & NUD_VALID)) {
+ ND_PRINTK1(KERN_DEBUG
+ "%s(): trying to ucast probe in NUD_INVALID: "
+ "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+ __FUNCTION__,
+ NIP6(*target));
+ }
+ ndisc_send_ns(dev, neigh, target, target, saddr);
+ } else if ((probes -= neigh->parms->app_probes) < 0) {
+#ifdef CONFIG_ARPD
+ neigh_app_ns(neigh);
+#endif
+ } else {
+ addrconf_addr_solict_mult(target, &mcaddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+ }
+}
+
+static void ndisc_recv_ns(struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb->tail - msg->opt;
+ struct ndisc_options ndopts;
+ struct net_device *dev = skb->dev;
+ struct inet6_ifaddr *ifp;
+ struct inet6_dev *idev = NULL;
+ struct neighbour *neigh;
+ int dad = ipv6_addr_any(saddr);
+ int inc;
+
+ if (ipv6_addr_is_multicast(&msg->target)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: multicast target address");
+ return;
+ }
+
+ /*
+ * RFC2461 7.1.1:
+ * DAD has to be destined for solicited node multicast address.
+ */
+ if (dad &&
+ !(daddr->s6_addr32[0] == htonl(0xff020000) &&
+ daddr->s6_addr32[1] == htonl(0x00000000) &&
+ daddr->s6_addr32[2] == htonl(0x00000001) &&
+ daddr->s6_addr [12] == 0xff )) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: bad DAD packet (wrong destination)\n");
+ return;
+ }
+
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid ND options\n");
+ return;
+ }
+
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid link-layer address length\n");
+ return;
+ }
+
+ /* RFC2461 7.1.1:
+ * If the IP source address is the unspecified address,
+ * there MUST NOT be source link-layer address option
+ * in the message.
+ */
+ if (dad) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
+ return;
+ }
+ }
+
+ inc = ipv6_addr_is_multicast(daddr);
+
+ if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1)) != NULL) {
+ if (ifp->flags & IFA_F_TENTATIVE) {
+ /* Address is tentative. If the source
+ is unspecified address, it is someone
+ does DAD, otherwise we ignore solicitations
+ until DAD timer expires.
+ */
+ if (!dad)
+ goto out;
+ if (dev->type == ARPHRD_IEEE802_TR) {
+ unsigned char *sadr = skb->mac.raw;
+ if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
+ sadr[9] == dev->dev_addr[1] &&
+ sadr[10] == dev->dev_addr[2] &&
+ sadr[11] == dev->dev_addr[3] &&
+ sadr[12] == dev->dev_addr[4] &&
+ sadr[13] == dev->dev_addr[5]) {
+ /* looped-back to us */
+ goto out;
+ }
+ }
+ addrconf_dad_failure(ifp);
+ return;
+ }
+
+ idev = ifp->idev;
+ } else {
+ idev = in6_dev_get(dev);
+ if (!idev) {
+ /* XXX: count this drop? */
+ return;
+ }
+
+ if (ipv6_chk_acast_addr(dev, &msg->target) ||
+ (idev->cnf.forwarding &&
+ pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) {
+ if (skb->stamp.tv_sec != LOCALLY_ENQUEUED &&
+ skb->pkt_type != PACKET_HOST &&
+ inc != 0 &&
+ idev->nd_parms->proxy_delay != 0) {
+ /*
+ * for anycast or proxy,
+ * sender should delay its response
+ * by a random time between 0 and
+ * MAX_ANYCAST_DELAY_TIME seconds.
+ * (RFC2461) -- yoshfuji
+ */
+ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
+ if (n)
+ pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
+ goto out;
+ }
+ } else
+ goto out;
+ }
+
+ if (dad) {
+ struct in6_addr maddr;
+
+ ipv6_addr_all_nodes(&maddr);
+ ndisc_send_na(dev, NULL, &maddr, &msg->target,
+ idev->cnf.forwarding, 0, (ifp != NULL), 1);
+ goto out;
+ }
+
+ if (inc)
+ NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
+ else
+ NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
+
+ /*
+ * update / create cache entry
+ * for the source address
+ */
+ neigh = __neigh_lookup(&nd_tbl, saddr, dev,
+ !inc || lladdr || !dev->addr_len);
+ if (neigh)
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE);
+ if (neigh || !dev->hard_header) {
+ ndisc_send_na(dev, neigh, saddr, &msg->target,
+ idev->cnf.forwarding,
+ 1, (ifp != NULL && inc), inc);
+ if (neigh)
+ neigh_release(neigh);
+ }
+
+out:
+ if (ifp)
+ in6_ifa_put(ifp);
+ else
+ in6_dev_put(idev);
+
+ return;
+}
+
+static void ndisc_recv_na(struct sk_buff *skb)
+{
+ struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct in6_addr *daddr = &skb->nh.ipv6h->daddr;
+ u8 *lladdr = NULL;
+ u32 ndoptlen = skb->tail - msg->opt;
+ struct ndisc_options ndopts;
+ struct net_device *dev = skb->dev;
+ struct inet6_ifaddr *ifp;
+ struct neighbour *neigh;
+
+ if (skb->len < sizeof(struct nd_msg)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: packet too short\n");
+ return;
+ }
+
+ if (ipv6_addr_is_multicast(&msg->target)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: target address is multicast.\n");
+ return;
+ }
+
+ if (ipv6_addr_is_multicast(daddr) &&
+ msg->icmph.icmp6_solicited) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: solicited NA is multicasted.\n");
+ return;
+ }
+
+ if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NS: invalid ND option\n");
+ return;
+ }
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NA: invalid link-layer address length\n");
+ return;
+ }
+ }
+ if ((ifp = ipv6_get_ifaddr(&msg->target, dev, 1))) {
+ if (ifp->flags & IFA_F_TENTATIVE) {
+ addrconf_dad_failure(ifp);
+ return;
+ }
+ /* What should we make now? The advertisement
+ is invalid, but ndisc specs say nothing
+ about it. It could be misconfiguration, or
+ an smart proxy agent tries to help us :-)
+ */
+ ND_PRINTK1(KERN_WARNING
+ "ICMPv6 NA: someone advertises our address on %s!\n",
+ ifp->idev->dev->name);
+ in6_ifa_put(ifp);
+ return;
+ }
+ neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
+
+ if (neigh) {
+ u8 old_flags = neigh->flags;
+
+ if (neigh->nud_state & NUD_FAILED)
+ goto out;
+
+ neigh_update(neigh, lladdr,
+ msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+ (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
+
+ if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
+ /*
+ * Change: router to host
+ */
+ struct rt6_info *rt;
+ rt = rt6_get_dflt_router(saddr, dev);
+ if (rt)
+ ip6_del_rt(rt, NULL, NULL);
+ }
+
+out:
+ neigh_release(neigh);
+ }
+}
+
+static void ndisc_recv_rs(struct sk_buff *skb)
+{
+ struct rs_msg *rs_msg = (struct rs_msg *) skb->h.raw;
+ unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
+ struct neighbour *neigh;
+ struct inet6_dev *idev;
+ struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+ struct ndisc_options ndopts;
+ u8 *lladdr = NULL;
+
+ if (skb->len < sizeof(*rs_msg))
+ return;
+
+ idev = in6_dev_get(skb->dev);
+ if (!idev) {
+ if (net_ratelimit())
+ ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
+ return;
+ }
+
+ /* Don't accept RS if we're not in router mode */
+ if (!idev->cnf.forwarding)
+ goto out;
+
+ /*
+ * Don't update NCE if src = ::;
+ * this implies that the source node has no ip address assigned yet.
+ */
+ if (ipv6_addr_any(saddr))
+ goto out;
+
+ /* Parse ND options */
+ if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
+ if (net_ratelimit())
+ ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
+ goto out;
+ }
+
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+ skb->dev);
+ if (!lladdr)
+ goto out;
+ }
+
+ neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
+ if (neigh) {
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
+ neigh_release(neigh);
+ }
+out:
+ in6_dev_put(idev);
+}
+
+static void ndisc_router_discovery(struct sk_buff *skb)
+{
+ struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
+ struct neighbour *neigh = NULL;
+ struct inet6_dev *in6_dev;
+ struct rt6_info *rt;
+ int lifetime;
+ struct ndisc_options ndopts;
+ int optlen;
+
+ __u8 * opt = (__u8 *)(ra_msg + 1);
+
+ optlen = (skb->tail - skb->h.raw) - sizeof(struct ra_msg);
+
+ if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: source address is not link-local.\n");
+ return;
+ }
+ if (optlen < 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: packet too short\n");
+ return;
+ }
+
+ /*
+ * set the RA_RECV flag in the interface
+ */
+
+ in6_dev = in6_dev_get(skb->dev);
+ if (in6_dev == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: can't find inet6 device for %s.\n",
+ skb->dev->name);
+ return;
+ }
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+
+ if (!ndisc_parse_options(opt, optlen, &ndopts)) {
+ in6_dev_put(in6_dev);
+ ND_PRINTK2(KERN_WARNING
+ "ICMP6 RA: invalid ND options\n");
+ return;
+ }
+
+ if (in6_dev->if_flags & IF_RS_SENT) {
+ /*
+ * flag that an RA was received after an RS was sent
+ * out on this interface.
+ */
+ in6_dev->if_flags |= IF_RA_RCVD;
+ }
+
+ /*
+ * Remember the managed/otherconf flags from most recently
+ * received RA message (RFC 2462) -- yoshfuji
+ */
+ in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
+ IF_RA_OTHERCONF)) |
+ (ra_msg->icmph.icmp6_addrconf_managed ?
+ IF_RA_MANAGED : 0) |
+ (ra_msg->icmph.icmp6_addrconf_other ?
+ IF_RA_OTHERCONF : 0);
+
+ lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
+
+ rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+
+ if (rt)
+ neigh = rt->rt6i_nexthop;
+
+ if (rt && lifetime == 0) {
+ neigh_clone(neigh);
+ ip6_del_rt(rt, NULL, NULL);
+ rt = NULL;
+ }
+
+ if (rt == NULL && lifetime) {
+ ND_PRINTK3(KERN_DEBUG
+ "ICMPv6 RA: adding default router.\n");
+
+ rt = rt6_add_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+ if (rt == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: %s() failed to add default route.\n",
+ __FUNCTION__);
+ in6_dev_put(in6_dev);
+ return;
+ }
+
+ neigh = rt->rt6i_nexthop;
+ if (neigh == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 RA: %s() got default router without neighbour.\n",
+ __FUNCTION__);
+ dst_release(&rt->u.dst);
+ in6_dev_put(in6_dev);
+ return;
+ }
+ neigh->flags |= NTF_ROUTER;
+ }
+
+ if (rt)
+ rt->rt6i_expires = jiffies + (HZ * lifetime);
+
+ if (ra_msg->icmph.icmp6_hop_limit) {
+ in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (rt)
+ rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
+ }
+
+ /*
+ * Update Reachable Time and Retrans Timer
+ */
+
+ if (in6_dev->nd_parms) {
+ unsigned long rtime = ntohl(ra_msg->retrans_timer);
+
+ if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
+ rtime = (rtime*HZ)/1000;
+ if (rtime < HZ/10)
+ rtime = HZ/10;
+ in6_dev->nd_parms->retrans_time = rtime;
+ in6_dev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ }
+
+ rtime = ntohl(ra_msg->reachable_time);
+ if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
+ rtime = (rtime*HZ)/1000;
+
+ if (rtime < HZ/10)
+ rtime = HZ/10;
+
+ if (rtime != in6_dev->nd_parms->base_reachable_time) {
+ in6_dev->nd_parms->base_reachable_time = rtime;
+ in6_dev->nd_parms->gc_staletime = 3 * rtime;
+ in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
+ in6_dev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+ }
+ }
+ }
+
+ /*
+ * Process options.
+ */
+
+ if (!neigh)
+ neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+ skb->dev, 1);
+ if (neigh) {
+ u8 *lladdr = NULL;
+ if (ndopts.nd_opts_src_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid link-layer address length\n");
+ goto out;
+ }
+ }
+ neigh_update(neigh, lladdr, NUD_STALE,
+ NEIGH_UPDATE_F_WEAK_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE|
+ NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
+ NEIGH_UPDATE_F_ISROUTER);
+ }
+
+ if (ndopts.nd_opts_pi) {
+ struct nd_opt_hdr *p;
+ for (p = ndopts.nd_opts_pi;
+ p;
+ p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
+ addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
+ }
+ }
+
+ if (ndopts.nd_opts_mtu) {
+ u32 mtu;
+
+ memcpy(&mtu, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
+ mtu = ntohl(mtu);
+
+ if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid mtu: %d\n",
+ mtu);
+ } else if (in6_dev->cnf.mtu6 != mtu) {
+ in6_dev->cnf.mtu6 = mtu;
+
+ if (rt)
+ rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+
+ rt6_mtu_change(skb->dev, mtu);
+ }
+ }
+
+ if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 RA: invalid RA options");
+ }
+out:
+ if (rt)
+ dst_release(&rt->u.dst);
+ else if (neigh)
+ neigh_release(neigh);
+ in6_dev_put(in6_dev);
+}
+
+static void ndisc_redirect_rcv(struct sk_buff *skb)
+{
+ struct inet6_dev *in6_dev;
+ struct icmp6hdr *icmph;
+ struct in6_addr *dest;
+ struct in6_addr *target; /* new first hop to destination */
+ struct neighbour *neigh;
+ int on_link = 0;
+ struct ndisc_options ndopts;
+ int optlen;
+ u8 *lladdr = NULL;
+
+ if (!(ipv6_addr_type(&skb->nh.ipv6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: source address is not link-local.\n");
+ return;
+ }
+
+ optlen = skb->tail - skb->h.raw;
+ optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+
+ if (optlen < 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: packet too short\n");
+ return;
+ }
+
+ icmph = (struct icmp6hdr *) skb->h.raw;
+ target = (struct in6_addr *) (icmph + 1);
+ dest = target + 1;
+
+ if (ipv6_addr_is_multicast(dest)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: destination address is multicast.\n");
+ return;
+ }
+
+ if (ipv6_addr_equal(dest, target)) {
+ on_link = 1;
+ } else if (!(ipv6_addr_type(target) & IPV6_ADDR_LINKLOCAL)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: target address is not link-local.\n");
+ return;
+ }
+
+ in6_dev = in6_dev_get(skb->dev);
+ if (!in6_dev)
+ return;
+ if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
+ in6_dev_put(in6_dev);
+ return;
+ }
+
+ /* RFC2461 8.1:
+ * The IP source address of the Redirect MUST be the same as the current
+ * first-hop router for the specified ICMP Destination Address.
+ */
+
+ if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: invalid ND options\n");
+ in6_dev_put(in6_dev);
+ return;
+ }
+ if (ndopts.nd_opts_tgt_lladdr) {
+ lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
+ skb->dev);
+ if (!lladdr) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: invalid link-layer address length\n");
+ in6_dev_put(in6_dev);
+ return;
+ }
+ }
+
+ neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
+ if (neigh) {
+ rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr,
+ on_link);
+ neigh_release(neigh);
+ }
+ in6_dev_put(in6_dev);
+}
+
+void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
+ struct in6_addr *target)
+{
+ struct sock *sk = ndisc_socket->sk;
+ int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
+ struct sk_buff *buff;
+ struct icmp6hdr *icmph;
+ struct in6_addr saddr_buf;
+ struct in6_addr *addrp;
+ struct net_device *dev;
+ struct rt6_info *rt;
+ struct dst_entry *dst;
+ struct inet6_dev *idev;
+ struct flowi fl;
+ u8 *opt;
+ int rd_len;
+ int err;
+ int hlen;
+ u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+
+ dev = skb->dev;
+
+ if (ipv6_get_lladdr(dev, &saddr_buf)) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: no link-local address on %s\n",
+ dev->name);
+ return;
+ }
+
+ ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
+
+ dst = ip6_route_output(NULL, &fl);
+ if (dst == NULL)
+ return;
+
+ err = xfrm_lookup(&dst, &fl, NULL, 0);
+ if (err) {
+ dst_release(dst);
+ return;
+ }
+
+ rt = (struct rt6_info *) dst;
+
+ if (rt->rt6i_flags & RTF_GATEWAY) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 Redirect: destination is not a neighbour.\n");
+ dst_release(dst);
+ return;
+ }
+ if (!xrlim_allow(dst, 1*HZ)) {
+ dst_release(dst);
+ return;
+ }
+
+ if (dev->addr_len) {
+ read_lock_bh(&neigh->lock);
+ if (neigh->nud_state & NUD_VALID) {
+ memcpy(ha_buf, neigh->ha, dev->addr_len);
+ read_unlock_bh(&neigh->lock);
+ ha = ha_buf;
+ len += ndisc_opt_addr_space(dev);
+ } else
+ read_unlock_bh(&neigh->lock);
+ }
+
+ rd_len = min_t(unsigned int,
+ IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
+ rd_len &= ~0x7;
+ len += rd_len;
+
+ buff = sock_alloc_send_skb(sk, MAX_HEADER + len + LL_RESERVED_SPACE(dev),
+ 1, &err);
+ if (buff == NULL) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
+ __FUNCTION__);
+ dst_release(dst);
+ return;
+ }
+
+ hlen = 0;
+
+ skb_reserve(buff, LL_RESERVED_SPACE(dev));
+ ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+ IPPROTO_ICMPV6, len);
+
+ icmph = (struct icmp6hdr *)skb_put(buff, len);
+ buff->h.raw = (unsigned char*)icmph;
+
+ memset(icmph, 0, sizeof(struct icmp6hdr));
+ icmph->icmp6_type = NDISC_REDIRECT;
+
+ /*
+ * copy target and destination addresses
+ */
+
+ addrp = (struct in6_addr *)(icmph + 1);
+ ipv6_addr_copy(addrp, target);
+ addrp++;
+ ipv6_addr_copy(addrp, &skb->nh.ipv6h->daddr);
+
+ opt = (u8*) (addrp + 1);
+
+ /*
+ * include target_address option
+ */
+
+ if (ha)
+ opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
+ dev->addr_len, dev->type);
+
+ /*
+ * build redirect option and copy skb over to the new packet.
+ */
+
+ memset(opt, 0, 8);
+ *(opt++) = ND_OPT_REDIRECT_HDR;
+ *(opt++) = (rd_len >> 3);
+ opt += 6;
+
+ memcpy(opt, skb->nh.ipv6h, rd_len - 8);
+
+ icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &skb->nh.ipv6h->saddr,
+ len, IPPROTO_ICMPV6,
+ csum_partial((u8 *) icmph, len, 0));
+
+ buff->dst = dst;
+ idev = in6_dev_get(dst->dev);
+ IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
+ err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, buff, NULL, dst->dev, dst_output);
+ if (!err) {
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTREDIRECTS);
+ ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
+ }
+
+ if (likely(idev != NULL))
+ in6_dev_put(idev);
+}
+
+static void pndisc_redo(struct sk_buff *skb)
+{
+ ndisc_rcv(skb);
+ kfree_skb(skb);
+}
+
+int ndisc_rcv(struct sk_buff *skb)
+{
+ struct nd_msg *msg;
+
+ if (!pskb_may_pull(skb, skb->len))
+ return 0;
+
+ msg = (struct nd_msg *) skb->h.raw;
+
+ __skb_push(skb, skb->data-skb->h.raw);
+
+ if (skb->nh.ipv6h->hop_limit != 255) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NDISC: invalid hop-limit: %d\n",
+ skb->nh.ipv6h->hop_limit);
+ return 0;
+ }
+
+ if (msg->icmph.icmp6_code != 0) {
+ ND_PRINTK2(KERN_WARNING
+ "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
+ msg->icmph.icmp6_code);
+ return 0;
+ }
+
+ switch (msg->icmph.icmp6_type) {
+ case NDISC_NEIGHBOUR_SOLICITATION:
+ ndisc_recv_ns(skb);
+ break;
+
+ case NDISC_NEIGHBOUR_ADVERTISEMENT:
+ ndisc_recv_na(skb);
+ break;
+
+ case NDISC_ROUTER_SOLICITATION:
+ ndisc_recv_rs(skb);
+ break;
+
+ case NDISC_ROUTER_ADVERTISEMENT:
+ ndisc_router_discovery(skb);
+ break;
+
+ case NDISC_REDIRECT:
+ ndisc_redirect_rcv(skb);
+ break;
+ };
+
+ return 0;
+}
+
+static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ switch (event) {
+ case NETDEV_CHANGEADDR:
+ neigh_changeaddr(&nd_tbl, dev);
+ fib6_run_gc(~0UL);
+ break;
+ case NETDEV_DOWN:
+ neigh_ifdown(&nd_tbl, dev);
+ fib6_run_gc(~0UL);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ndisc_netdev_notifier = {
+ .notifier_call = ndisc_netdev_event,
+};
+
+#ifdef CONFIG_SYSCTL
+static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
+ const char *func, const char *dev_name)
+{
+ static char warncomm[TASK_COMM_LEN];
+ static int warned;
+ if (strcmp(warncomm, current->comm) && warned < 5) {
+ strcpy(warncomm, current->comm);
+ printk(KERN_WARNING
+ "process `%s' is using deprecated sysctl (%s) "
+ "net.ipv6.neigh.%s.%s; "
+ "Use net.ipv6.neigh.%s.%s_ms "
+ "instead.\n",
+ warncomm, func,
+ dev_name, ctl->procname,
+ dev_name, ctl->procname);
+ warned++;
+ }
+}
+
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net_device *dev = ctl->extra1;
+ struct inet6_dev *idev;
+ int ret;
+
+ if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
+ ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
+ ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
+
+ switch (ctl->ctl_name) {
+ case NET_NEIGH_RETRANS_TIME:
+ ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ break;
+ case NET_NEIGH_REACHABLE_TIME:
+ ret = proc_dointvec_jiffies(ctl, write,
+ filp, buffer, lenp, ppos);
+ break;
+ case NET_NEIGH_RETRANS_TIME_MS:
+ case NET_NEIGH_REACHABLE_TIME_MS:
+ ret = proc_dointvec_ms_jiffies(ctl, write,
+ filp, buffer, lenp, ppos);
+ break;
+ default:
+ ret = -1;
+ }
+
+ if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
+ if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
+ ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
+ idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+ in6_dev_put(idev);
+ }
+ return ret;
+}
+
+static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+ int nlen, void __user *oldval,
+ size_t __user *oldlenp,
+ void __user *newval, size_t newlen,
+ void **context)
+{
+ struct net_device *dev = ctl->extra1;
+ struct inet6_dev *idev;
+ int ret;
+
+ if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
+ ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
+ ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
+
+ switch (ctl->ctl_name) {
+ case NET_NEIGH_REACHABLE_TIME:
+ ret = sysctl_jiffies(ctl, name, nlen,
+ oldval, oldlenp, newval, newlen,
+ context);
+ break;
+ case NET_NEIGH_RETRANS_TIME_MS:
+ case NET_NEIGH_REACHABLE_TIME_MS:
+ ret = sysctl_ms_jiffies(ctl, name, nlen,
+ oldval, oldlenp, newval, newlen,
+ context);
+ break;
+ default:
+ ret = 0;
+ }
+
+ if (newval && newlen && ret > 0 &&
+ dev && (idev = in6_dev_get(dev)) != NULL) {
+ if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
+ ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
+ idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
+ idev->tstamp = jiffies;
+ inet6_ifinfo_notify(RTM_NEWLINK, idev);
+ in6_dev_put(idev);
+ }
+
+ return ret;
+}
+
+#endif
+
+int __init ndisc_init(struct net_proto_family *ops)
+{
+ struct ipv6_pinfo *np;
+ struct sock *sk;
+ int err;
+
+ err = sock_create_kern(PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, &ndisc_socket);
+ if (err < 0) {
+ ND_PRINTK0(KERN_ERR
+ "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
+ err);
+ ndisc_socket = NULL; /* For safety. */
+ return err;
+ }
+
+ sk = ndisc_socket->sk;
+ np = inet6_sk(sk);
+ sk->sk_allocation = GFP_ATOMIC;
+ np->hop_limit = 255;
+ /* Do not loopback ndisc messages */
+ np->mc_loop = 0;
+ sk->sk_prot->unhash(sk);
+
+ /*
+ * Initialize the neighbour table
+ */
+
+ neigh_table_init(&nd_tbl);
+
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6, NET_IPV6_NEIGH,
+ "ipv6",
+ &ndisc_ifinfo_sysctl_change,
+ &ndisc_ifinfo_sysctl_strategy);
+#endif
+
+ register_netdevice_notifier(&ndisc_netdev_notifier);
+ return 0;
+}
+
+void ndisc_cleanup(void)
+{
+#ifdef CONFIG_SYSCTL
+ neigh_sysctl_unregister(&nd_tbl.parms);
+#endif
+ neigh_table_clear(&nd_tbl);
+ sock_release(ndisc_socket);
+ ndisc_socket = NULL; /* For safety. */
+}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
new file mode 100644
index 000000000000..77ec704c9ee3
--- /dev/null
+++ b/net/ipv6/netfilter/Kconfig
@@ -0,0 +1,242 @@
+#
+# IP netfilter configuration
+#
+
+menu "IPv6: Netfilter Configuration (EXPERIMENTAL)"
+ depends on INET && IPV6 && NETFILTER && EXPERIMENTAL
+
+#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK
+#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
+# dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK
+#fi
+config IP6_NF_QUEUE
+ tristate "Userspace queueing via NETLINK"
+ ---help---
+
+ This option adds a queue handler to the kernel for IPv6
+ packets which lets us to receive the filtered packets
+ with QUEUE target using libiptc as we can do with
+ the IPv4 now.
+
+ (C) Fernando Anton 2001
+ IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ Universidad Carlos III de Madrid
+ Universidad Politecnica de Alcala de Henares
+ email: <fanton@it.uc3m.es>.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_IPTABLES
+ tristate "IP6 tables support (required for filtering/masq/NAT)"
+ help
+ ip6tables is a general, extensible packet identification framework.
+ Currently only the packet filtering and packet mangling subsystem
+ for IPv6 use this, but connection tracking is going to follow.
+ Say 'Y' or 'M' here if you want to use either of those.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# The simple matches.
+config IP6_NF_MATCH_LIMIT
+ tristate "limit match support"
+ depends on IP6_NF_IPTABLES
+ help
+ limit matching allows you to control the rate at which a rule can be
+ matched: mainly useful in combination with the LOG target ("LOG
+ target support", below) and to avoid some Denial of Service attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_MAC
+ tristate "MAC address match support"
+ depends on IP6_NF_IPTABLES
+ help
+ mac matching allows you to match packets based on the source
+ Ethernet address of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_RT
+ tristate "Routing header match support"
+ depends on IP6_NF_IPTABLES
+ help
+ rt matching allows you to match packets based on the routing
+ header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_OPTS
+ tristate "Hop-by-hop and Dst opts header match support"
+ depends on IP6_NF_IPTABLES
+ help
+ This allows one to match packets based on the hop-by-hop
+ and destination options headers of a packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_FRAG
+ tristate "Fragmentation header match support"
+ depends on IP6_NF_IPTABLES
+ help
+ frag matching allows you to match packets based on the fragmentation
+ header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_HL
+ tristate "HL match support"
+ depends on IP6_NF_IPTABLES
+ help
+ HL matching allows you to match packets based on the hop
+ limit of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_MULTIPORT
+ tristate "Multiple port match support"
+ depends on IP6_NF_IPTABLES
+ help
+ Multiport matching allows you to match TCP or UDP packets based on
+ a series of source or destination ports: normally a rule can only
+ match a single range of ports.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_OWNER
+ tristate "Owner match support"
+ depends on IP6_NF_IPTABLES
+ help
+ Packet owner matching allows you to match locally-generated packets
+ based on who created them: the user, group, process or session.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES
+config IP6_NF_MATCH_MARK
+ tristate "netfilter MARK match support"
+ depends on IP6_NF_IPTABLES
+ help
+ Netfilter mark matching allows you to match packets based on the
+ `nfmark' value in the packet. This can be set by the MARK target
+ (see below).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_IPV6HEADER
+ tristate "IPv6 Extension Headers Match"
+ depends on IP6_NF_IPTABLES
+ help
+ This module allows one to match packets based upon
+ the ipv6 extension headers.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_AHESP
+ tristate "AH/ESP match support"
+ depends on IP6_NF_IPTABLES
+ help
+ This module allows one to match AH and ESP packets.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_LENGTH
+ tristate "Packet Length match support"
+ depends on IP6_NF_IPTABLES
+ help
+ This option allows you to match the length of a packet against a
+ specific value or range of values.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_EUI64
+ tristate "EUI64 address check"
+ depends on IP6_NF_IPTABLES
+ help
+ This module performs checking on the IPv6 source address
+ Compares the last 64 bits with the EUI64 (delivered
+ from the MAC address) address
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_MATCH_PHYSDEV
+ tristate "Physdev match support"
+ depends on IP6_NF_IPTABLES && BRIDGE_NETFILTER
+ help
+ Physdev packet matching matches against the physical bridge ports
+ the IP packet arrived on or will leave by.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES
+# dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES
+# if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then
+# dep_tristate ' Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES
+# fi
+# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+# dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES
+# dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES
+# fi
+# The targets
+config IP6_NF_FILTER
+ tristate "Packet filtering"
+ depends on IP6_NF_IPTABLES
+ help
+ Packet filtering defines a table `filter', which has a series of
+ rules for simple packet filtering at local input, forwarding and
+ local output. See the man page for iptables(8).
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+config IP6_NF_TARGET_LOG
+ tristate "LOG target support"
+ depends on IP6_NF_FILTER
+ help
+ This option adds a `LOG' target, which allows you to create rules in
+ any iptables table which records the packet header to the syslog.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then
+# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER
+# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+# dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER
+# fi
+# fi
+config IP6_NF_MANGLE
+ tristate "Packet mangling"
+ depends on IP6_NF_IPTABLES
+ help
+ This option adds a `mangle' table to iptables: see the man page for
+ iptables(8). This table is used for various packet alterations
+ which can effect how the packet is routed.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE
+config IP6_NF_TARGET_MARK
+ tristate "MARK target support"
+ depends on IP6_NF_MANGLE
+ help
+ This option adds a `MARK' target, which allows you to create rules
+ in the `mangle' table which alter the netfilter mark (nfmark) field
+ associated with the packet packet prior to routing. This can change
+ the routing method (see `Use netfilter MARK value as routing
+ key') and can also be used by other subsystems to change their
+ behavior.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
+#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES
+config IP6_NF_RAW
+ tristate 'raw table support (required for TRACE)'
+ depends on IP6_NF_IPTABLES
+ help
+ This option adds a `raw' table to ip6tables. This table is the very
+ first in the netfilter framework and hooks in at the PREROUTING
+ and OUTPUT chains.
+
+ If you want to compile it as a module, say M here and read
+ <file:Documentation/modules.txt>. If unsure, say `N'.
+
+endmenu
+
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
new file mode 100644
index 000000000000..2e51714953b6
--- /dev/null
+++ b/net/ipv6/netfilter/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for the netfilter modules on top of IPv6.
+#
+
+# Link order matters here.
+obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o
+obj-$(CONFIG_IP6_NF_MATCH_LIMIT) += ip6t_limit.o
+obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o
+obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o
+obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o
+obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o
+obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
+obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
+obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o
+obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
+obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o
+obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o
+obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o
+obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o
+obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o
+obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o
+obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o
+obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
+obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
+obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
new file mode 100644
index 000000000000..c54830b89593
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -0,0 +1,741 @@
+/*
+ * This is a module which is used for queueing IPv6 packets and
+ * communicating with userspace via netlink.
+ *
+ * (C) 2001 Fernando Anton, this code is GPL.
+ * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra.
+ * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain
+ * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain
+ * email: fanton@it.uc3m.es
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying
+ * to adapt it to IPv6
+ * HEAVILY based in ipqueue.c by James Morris. It's just
+ * a little modified version of it, so he's nearly the
+ * real coder of this.
+ * Few changes needed, mainly the hard_routing code and
+ * the netlink socket protocol (we're NETLINK_IP6_FW).
+ * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c]
+ * 2005-02-04: Added /proc counter for dropped packets; fixed so
+ * packets aren't delivered to user space if they're going
+ * to be dropped.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ipv6.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <net/sock.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <linux/netfilter_ipv4/ip_queue.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+#define IPQ_QMAX_DEFAULT 1024
+#define IPQ_PROC_FS_NAME "ip6_queue"
+#define NET_IPQ_QMAX 2088
+#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen"
+
+struct ipq_rt_info {
+ struct in6_addr daddr;
+ struct in6_addr saddr;
+};
+
+struct ipq_queue_entry {
+ struct list_head list;
+ struct nf_info *info;
+ struct sk_buff *skb;
+ struct ipq_rt_info rt_info;
+};
+
+typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
+
+static unsigned char copy_mode = IPQ_COPY_NONE;
+static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
+static DEFINE_RWLOCK(queue_lock);
+static int peer_pid;
+static unsigned int copy_range;
+static unsigned int queue_total;
+static unsigned int queue_dropped = 0;
+static unsigned int queue_user_dropped = 0;
+static struct sock *ipqnl;
+static LIST_HEAD(queue_list);
+static DECLARE_MUTEX(ipqnl_sem);
+
+static void
+ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
+{
+ nf_reinject(entry->skb, entry->info, verdict);
+ kfree(entry);
+}
+
+static inline void
+__ipq_enqueue_entry(struct ipq_queue_entry *entry)
+{
+ list_add(&entry->list, &queue_list);
+ queue_total++;
+}
+
+/*
+ * Find and return a queued entry matched by cmpfn, or return the last
+ * entry if cmpfn is NULL.
+ */
+static inline struct ipq_queue_entry *
+__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct list_head *p;
+
+ list_for_each_prev(p, &queue_list) {
+ struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
+
+ if (!cmpfn || cmpfn(entry, data))
+ return entry;
+ }
+ return NULL;
+}
+
+static inline void
+__ipq_dequeue_entry(struct ipq_queue_entry *entry)
+{
+ list_del(&entry->list);
+ queue_total--;
+}
+
+static inline struct ipq_queue_entry *
+__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct ipq_queue_entry *entry;
+
+ entry = __ipq_find_entry(cmpfn, data);
+ if (entry == NULL)
+ return NULL;
+
+ __ipq_dequeue_entry(entry);
+ return entry;
+}
+
+
+static inline void
+__ipq_flush(int verdict)
+{
+ struct ipq_queue_entry *entry;
+
+ while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
+ ipq_issue_verdict(entry, verdict);
+}
+
+static inline int
+__ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status = 0;
+
+ switch(mode) {
+ case IPQ_COPY_NONE:
+ case IPQ_COPY_META:
+ copy_mode = mode;
+ copy_range = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ copy_mode = mode;
+ copy_range = range;
+ if (copy_range > 0xFFFF)
+ copy_range = 0xFFFF;
+ break;
+
+ default:
+ status = -EINVAL;
+
+ }
+ return status;
+}
+
+static inline void
+__ipq_reset(void)
+{
+ peer_pid = 0;
+ net_disable_timestamp();
+ __ipq_set_mode(IPQ_COPY_NONE, 0);
+ __ipq_flush(NF_DROP);
+}
+
+static struct ipq_queue_entry *
+ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
+{
+ struct ipq_queue_entry *entry;
+
+ write_lock_bh(&queue_lock);
+ entry = __ipq_find_dequeue_entry(cmpfn, data);
+ write_unlock_bh(&queue_lock);
+ return entry;
+}
+
+static void
+ipq_flush(int verdict)
+{
+ write_lock_bh(&queue_lock);
+ __ipq_flush(verdict);
+ write_unlock_bh(&queue_lock);
+}
+
+static struct sk_buff *
+ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
+{
+ unsigned char *old_tail;
+ size_t size = 0;
+ size_t data_len = 0;
+ struct sk_buff *skb;
+ struct ipq_packet_msg *pmsg;
+ struct nlmsghdr *nlh;
+
+ read_lock_bh(&queue_lock);
+
+ switch (copy_mode) {
+ case IPQ_COPY_META:
+ case IPQ_COPY_NONE:
+ size = NLMSG_SPACE(sizeof(*pmsg));
+ data_len = 0;
+ break;
+
+ case IPQ_COPY_PACKET:
+ if (copy_range == 0 || copy_range > entry->skb->len)
+ data_len = entry->skb->len;
+ else
+ data_len = copy_range;
+
+ size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
+ break;
+
+ default:
+ *errp = -EINVAL;
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
+
+ read_unlock_bh(&queue_lock);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ goto nlmsg_failure;
+
+ old_tail= skb->tail;
+ nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
+ pmsg = NLMSG_DATA(nlh);
+ memset(pmsg, 0, sizeof(*pmsg));
+
+ pmsg->packet_id = (unsigned long )entry;
+ pmsg->data_len = data_len;
+ pmsg->timestamp_sec = entry->skb->stamp.tv_sec;
+ pmsg->timestamp_usec = entry->skb->stamp.tv_usec;
+ pmsg->mark = entry->skb->nfmark;
+ pmsg->hook = entry->info->hook;
+ pmsg->hw_protocol = entry->skb->protocol;
+
+ if (entry->info->indev)
+ strcpy(pmsg->indev_name, entry->info->indev->name);
+ else
+ pmsg->indev_name[0] = '\0';
+
+ if (entry->info->outdev)
+ strcpy(pmsg->outdev_name, entry->info->outdev->name);
+ else
+ pmsg->outdev_name[0] = '\0';
+
+ if (entry->info->indev && entry->skb->dev) {
+ pmsg->hw_type = entry->skb->dev->type;
+ if (entry->skb->dev->hard_header_parse)
+ pmsg->hw_addrlen =
+ entry->skb->dev->hard_header_parse(entry->skb,
+ pmsg->hw_addr);
+ }
+
+ if (data_len)
+ if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
+ BUG();
+
+ nlh->nlmsg_len = skb->tail - old_tail;
+ return skb;
+
+nlmsg_failure:
+ if (skb)
+ kfree_skb(skb);
+ *errp = -EINVAL;
+ printk(KERN_ERR "ip6_queue: error creating packet message\n");
+ return NULL;
+}
+
+static int
+ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data)
+{
+ int status = -EINVAL;
+ struct sk_buff *nskb;
+ struct ipq_queue_entry *entry;
+
+ if (copy_mode == IPQ_COPY_NONE)
+ return -EAGAIN;
+
+ entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (entry == NULL) {
+ printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n");
+ return -ENOMEM;
+ }
+
+ entry->info = info;
+ entry->skb = skb;
+
+ if (entry->info->hook == NF_IP_LOCAL_OUT) {
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+
+ entry->rt_info.daddr = iph->daddr;
+ entry->rt_info.saddr = iph->saddr;
+ }
+
+ nskb = ipq_build_packet_message(entry, &status);
+ if (nskb == NULL)
+ goto err_out_free;
+
+ write_lock_bh(&queue_lock);
+
+ if (!peer_pid)
+ goto err_out_free_nskb;
+
+ if (queue_total >= queue_maxlen) {
+ queue_dropped++;
+ status = -ENOSPC;
+ if (net_ratelimit())
+ printk (KERN_WARNING "ip6_queue: fill at %d entries, "
+ "dropping packet(s). Dropped: %d\n", queue_total,
+ queue_dropped);
+ goto err_out_free_nskb;
+ }
+
+ /* netlink_unicast will either free the nskb or attach it to a socket */
+ status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
+ if (status < 0) {
+ queue_user_dropped++;
+ goto err_out_unlock;
+ }
+
+ __ipq_enqueue_entry(entry);
+
+ write_unlock_bh(&queue_lock);
+ return status;
+
+err_out_free_nskb:
+ kfree_skb(nskb);
+
+err_out_unlock:
+ write_unlock_bh(&queue_lock);
+
+err_out_free:
+ kfree(entry);
+ return status;
+}
+
+static int
+ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
+{
+ int diff;
+ struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload;
+
+ if (v->data_len < sizeof(*user_iph))
+ return 0;
+ diff = v->data_len - e->skb->len;
+ if (diff < 0)
+ skb_trim(e->skb, v->data_len);
+ else if (diff > 0) {
+ if (v->data_len > 0xFFFF)
+ return -EINVAL;
+ if (diff > skb_tailroom(e->skb)) {
+ struct sk_buff *newskb;
+
+ newskb = skb_copy_expand(e->skb,
+ skb_headroom(e->skb),
+ diff,
+ GFP_ATOMIC);
+ if (newskb == NULL) {
+ printk(KERN_WARNING "ip6_queue: OOM "
+ "in mangle, dropping packet\n");
+ return -ENOMEM;
+ }
+ if (e->skb->sk)
+ skb_set_owner_w(newskb, e->skb->sk);
+ kfree_skb(e->skb);
+ e->skb = newskb;
+ }
+ skb_put(e->skb, diff);
+ }
+ if (!skb_ip_make_writable(&e->skb, v->data_len))
+ return -ENOMEM;
+ memcpy(e->skb->data, v->payload, v->data_len);
+ e->skb->nfcache |= NFC_ALTERED;
+
+ /*
+ * Extra routing may needed on local out, as the QUEUE target never
+ * returns control to the table.
+ * Not a nice way to cmp, but works
+ */
+ if (e->info->hook == NF_IP_LOCAL_OUT) {
+ struct ipv6hdr *iph = e->skb->nh.ipv6h;
+ if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) ||
+ !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr))
+ return ip6_route_me_harder(e->skb);
+ }
+ return 0;
+}
+
+static inline int
+id_cmp(struct ipq_queue_entry *e, unsigned long id)
+{
+ return (id == (unsigned long )e);
+}
+
+static int
+ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
+{
+ struct ipq_queue_entry *entry;
+
+ if (vmsg->value > NF_MAX_VERDICT)
+ return -EINVAL;
+
+ entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
+ if (entry == NULL)
+ return -ENOENT;
+ else {
+ int verdict = vmsg->value;
+
+ if (vmsg->data_len && vmsg->data_len == len)
+ if (ipq_mangle_ipv6(vmsg, entry) < 0)
+ verdict = NF_DROP;
+
+ ipq_issue_verdict(entry, verdict);
+ return 0;
+ }
+}
+
+static int
+ipq_set_mode(unsigned char mode, unsigned int range)
+{
+ int status;
+
+ write_lock_bh(&queue_lock);
+ status = __ipq_set_mode(mode, range);
+ write_unlock_bh(&queue_lock);
+ return status;
+}
+
+static int
+ipq_receive_peer(struct ipq_peer_msg *pmsg,
+ unsigned char type, unsigned int len)
+{
+ int status = 0;
+
+ if (len < sizeof(*pmsg))
+ return -EINVAL;
+
+ switch (type) {
+ case IPQM_MODE:
+ status = ipq_set_mode(pmsg->msg.mode.value,
+ pmsg->msg.mode.range);
+ break;
+
+ case IPQM_VERDICT:
+ if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
+ status = -EINVAL;
+ else
+ status = ipq_set_verdict(&pmsg->msg.verdict,
+ len - sizeof(*pmsg));
+ break;
+ default:
+ status = -EINVAL;
+ }
+ return status;
+}
+
+static int
+dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
+{
+ if (entry->info->indev)
+ if (entry->info->indev->ifindex == ifindex)
+ return 1;
+
+ if (entry->info->outdev)
+ if (entry->info->outdev->ifindex == ifindex)
+ return 1;
+
+ return 0;
+}
+
+static void
+ipq_dev_drop(int ifindex)
+{
+ struct ipq_queue_entry *entry;
+
+ while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
+ ipq_issue_verdict(entry, NF_DROP);
+}
+
+#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
+
+static inline void
+ipq_rcv_skb(struct sk_buff *skb)
+{
+ int status, type, pid, flags, nlmsglen, skblen;
+ struct nlmsghdr *nlh;
+
+ skblen = skb->len;
+ if (skblen < sizeof(*nlh))
+ return;
+
+ nlh = (struct nlmsghdr *)skb->data;
+ nlmsglen = nlh->nlmsg_len;
+ if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
+ return;
+
+ pid = nlh->nlmsg_pid;
+ flags = nlh->nlmsg_flags;
+
+ if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (flags & MSG_TRUNC)
+ RCV_SKB_FAIL(-ECOMM);
+
+ type = nlh->nlmsg_type;
+ if (type < NLMSG_NOOP || type >= IPQM_MAX)
+ RCV_SKB_FAIL(-EINVAL);
+
+ if (type <= IPQM_BASE)
+ return;
+
+ if (security_netlink_recv(skb))
+ RCV_SKB_FAIL(-EPERM);
+
+ write_lock_bh(&queue_lock);
+
+ if (peer_pid) {
+ if (peer_pid != pid) {
+ write_unlock_bh(&queue_lock);
+ RCV_SKB_FAIL(-EBUSY);
+ }
+ } else {
+ net_enable_timestamp();
+ peer_pid = pid;
+ }
+
+ write_unlock_bh(&queue_lock);
+
+ status = ipq_receive_peer(NLMSG_DATA(nlh), type,
+ skblen - NLMSG_LENGTH(0));
+ if (status < 0)
+ RCV_SKB_FAIL(status);
+
+ if (flags & NLM_F_ACK)
+ netlink_ack(skb, nlh, 0);
+ return;
+}
+
+static void
+ipq_rcv_sk(struct sock *sk, int len)
+{
+ do {
+ struct sk_buff *skb;
+
+ if (down_trylock(&ipqnl_sem))
+ return;
+
+ while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+ ipq_rcv_skb(skb);
+ kfree_skb(skb);
+ }
+
+ up(&ipqnl_sem);
+
+ } while (ipqnl && ipqnl->sk_receive_queue.qlen);
+}
+
+static int
+ipq_rcv_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = ptr;
+
+ /* Drop any packets associated with the downed device */
+ if (event == NETDEV_DOWN)
+ ipq_dev_drop(dev->ifindex);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_dev_notifier = {
+ .notifier_call = ipq_rcv_dev_event,
+};
+
+static int
+ipq_rcv_nl_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct netlink_notify *n = ptr;
+
+ if (event == NETLINK_URELEASE &&
+ n->protocol == NETLINK_IP6_FW && n->pid) {
+ write_lock_bh(&queue_lock);
+ if (n->pid == peer_pid)
+ __ipq_reset();
+ write_unlock_bh(&queue_lock);
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ipq_nl_notifier = {
+ .notifier_call = ipq_rcv_nl_event,
+};
+
+static struct ctl_table_header *ipq_sysctl_header;
+
+static ctl_table ipq_table[] = {
+ {
+ .ctl_name = NET_IPQ_QMAX,
+ .procname = NET_IPQ_QMAX_NAME,
+ .data = &queue_maxlen,
+ .maxlen = sizeof(queue_maxlen),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ipq_dir_table[] = {
+ {
+ .ctl_name = NET_IPV6,
+ .procname = "ipv6",
+ .mode = 0555,
+ .child = ipq_table
+ },
+ { .ctl_name = 0 }
+};
+
+static ctl_table ipq_root_table[] = {
+ {
+ .ctl_name = CTL_NET,
+ .procname = "net",
+ .mode = 0555,
+ .child = ipq_dir_table
+ },
+ { .ctl_name = 0 }
+};
+
+static int
+ipq_get_info(char *buffer, char **start, off_t offset, int length)
+{
+ int len;
+
+ read_lock_bh(&queue_lock);
+
+ len = sprintf(buffer,
+ "Peer PID : %d\n"
+ "Copy mode : %hu\n"
+ "Copy range : %u\n"
+ "Queue length : %u\n"
+ "Queue max. length : %u\n"
+ "Queue dropped : %u\n"
+ "Netfilter dropped : %u\n",
+ peer_pid,
+ copy_mode,
+ copy_range,
+ queue_total,
+ queue_maxlen,
+ queue_dropped,
+ queue_user_dropped);
+
+ read_unlock_bh(&queue_lock);
+
+ *start = buffer + offset;
+ len -= offset;
+ if (len > length)
+ len = length;
+ else if (len < 0)
+ len = 0;
+ return len;
+}
+
+static int
+init_or_cleanup(int init)
+{
+ int status = -ENOMEM;
+ struct proc_dir_entry *proc;
+
+ if (!init)
+ goto cleanup;
+
+ netlink_register_notifier(&ipq_nl_notifier);
+ ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk);
+ if (ipqnl == NULL) {
+ printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+ goto cleanup_netlink_notifier;
+ }
+
+ proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ if (proc)
+ proc->owner = THIS_MODULE;
+ else {
+ printk(KERN_ERR "ip6_queue: failed to create proc entry\n");
+ goto cleanup_ipqnl;
+ }
+
+ register_netdevice_notifier(&ipq_dev_notifier);
+ ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
+
+ status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL);
+ if (status < 0) {
+ printk(KERN_ERR "ip6_queue: failed to register queue handler\n");
+ goto cleanup_sysctl;
+ }
+ return status;
+
+cleanup:
+ nf_unregister_queue_handler(PF_INET6);
+ synchronize_net();
+ ipq_flush(NF_DROP);
+
+cleanup_sysctl:
+ unregister_sysctl_table(ipq_sysctl_header);
+ unregister_netdevice_notifier(&ipq_dev_notifier);
+ proc_net_remove(IPQ_PROC_FS_NAME);
+
+cleanup_ipqnl:
+ sock_release(ipqnl->sk_socket);
+ down(&ipqnl_sem);
+ up(&ipqnl_sem);
+
+cleanup_netlink_notifier:
+ netlink_unregister_notifier(&ipq_nl_notifier);
+ return status;
+}
+
+static int __init init(void)
+{
+
+ return init_or_cleanup(1);
+}
+
+static void __exit fini(void)
+{
+ init_or_cleanup(0);
+}
+
+MODULE_DESCRIPTION("IPv6 packet queue handler");
+MODULE_LICENSE("GPL");
+
+module_init(init);
+module_exit(fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
new file mode 100644
index 000000000000..c735276fdd5f
--- /dev/null
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -0,0 +1,1970 @@
+/*
+ * Packet matching code.
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2002 Netfilter core team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
+ * - increase module usage count as soon as we have rules inside
+ * a table
+ * 06 Jun 2002 Andras Kis-Szabo <kisza@sch.bme.hu>
+ * - new extension header parser code
+ */
+#include <linux/config.h>
+#include <linux/skbuff.h>
+#include <linux/kmod.h>
+#include <linux/vmalloc.h>
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmpv6.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+#include <linux/proc_fs.h>
+
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
+MODULE_DESCRIPTION("IPv6 packet filter");
+
+#define IPV6_HDR_LEN (sizeof(struct ipv6hdr))
+#define IPV6_OPTHDR_LEN (sizeof(struct ipv6_opt_hdr))
+
+/*#define DEBUG_IP_FIREWALL*/
+/*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
+/*#define DEBUG_IP_FIREWALL_USER*/
+
+#ifdef DEBUG_IP_FIREWALL
+#define dprintf(format, args...) printk(format , ## args)
+#else
+#define dprintf(format, args...)
+#endif
+
+#ifdef DEBUG_IP_FIREWALL_USER
+#define duprintf(format, args...) printk(format , ## args)
+#else
+#define duprintf(format, args...)
+#endif
+
+#ifdef CONFIG_NETFILTER_DEBUG
+#define IP_NF_ASSERT(x) \
+do { \
+ if (!(x)) \
+ printk("IP_NF_ASSERT: %s:%s:%u\n", \
+ __FUNCTION__, __FILE__, __LINE__); \
+} while(0)
+#else
+#define IP_NF_ASSERT(x)
+#endif
+#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
+
+static DECLARE_MUTEX(ip6t_mutex);
+
+/* Must have mutex */
+#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
+#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
+#include <linux/netfilter_ipv4/lockhelp.h>
+#include <linux/netfilter_ipv4/listhelp.h>
+
+#if 0
+/* All the better to debug you with... */
+#define static
+#define inline
+#endif
+
+/* Locking is simple: we assume at worst case there will be one packet
+ in user context and one from bottom halves (or soft irq if Alexey's
+ softnet patch was applied).
+
+ We keep a set of rules for each CPU, so we can avoid write-locking
+ them; doing a readlock_bh() stops packets coming through if we're
+ in user context.
+
+ To be cache friendly on SMP, we arrange them like so:
+ [ n-entries ]
+ ... cache-align padding ...
+ [ n-entries ]
+
+ Hence the start of any table is given by get_table() below. */
+
+/* The table itself */
+struct ip6t_table_info
+{
+ /* Size per table */
+ unsigned int size;
+ /* Number of entries: FIXME. --RR */
+ unsigned int number;
+ /* Initial number of entries. Needed for module usage count */
+ unsigned int initial_entries;
+
+ /* Entry points and underflows */
+ unsigned int hook_entry[NF_IP6_NUMHOOKS];
+ unsigned int underflow[NF_IP6_NUMHOOKS];
+
+ /* ip6t_entry tables: one per CPU */
+ char entries[0] ____cacheline_aligned;
+};
+
+static LIST_HEAD(ip6t_target);
+static LIST_HEAD(ip6t_match);
+static LIST_HEAD(ip6t_tables);
+#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
+
+#ifdef CONFIG_SMP
+#define TABLE_OFFSET(t,p) (SMP_ALIGN((t)->size)*(p))
+#else
+#define TABLE_OFFSET(t,p) 0
+#endif
+
+#if 0
+#define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
+#define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
+#define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
+#endif
+
+static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
+ struct in6_addr addr2)
+{
+ int i;
+ for( i = 0; i < 16; i++){
+ if((addr1.s6_addr[i] & mask.s6_addr[i]) !=
+ (addr2.s6_addr[i] & mask.s6_addr[i]))
+ return 1;
+ }
+ return 0;
+}
+
+/* Check for an extension */
+int
+ip6t_ext_hdr(u8 nexthdr)
+{
+ return ( (nexthdr == IPPROTO_HOPOPTS) ||
+ (nexthdr == IPPROTO_ROUTING) ||
+ (nexthdr == IPPROTO_FRAGMENT) ||
+ (nexthdr == IPPROTO_ESP) ||
+ (nexthdr == IPPROTO_AH) ||
+ (nexthdr == IPPROTO_NONE) ||
+ (nexthdr == IPPROTO_DSTOPTS) );
+}
+
+/* Returns whether matches rule or not. */
+static inline int
+ip6_packet_match(const struct sk_buff *skb,
+ const char *indev,
+ const char *outdev,
+ const struct ip6t_ip6 *ip6info,
+ unsigned int *protoff,
+ int *fragoff)
+{
+ size_t i;
+ unsigned long ret;
+ const struct ipv6hdr *ipv6 = skb->nh.ipv6h;
+
+#define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
+
+ if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
+ IP6T_INV_SRCIP)
+ || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
+ IP6T_INV_DSTIP)) {
+ dprintf("Source or dest mismatch.\n");
+/*
+ dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
+ ipinfo->smsk.s_addr, ipinfo->src.s_addr,
+ ipinfo->invflags & IP6T_INV_SRCIP ? " (INV)" : "");
+ dprintf("DST: %u. Mask: %u. Target: %u.%s\n", ip->daddr,
+ ipinfo->dmsk.s_addr, ipinfo->dst.s_addr,
+ ipinfo->invflags & IP6T_INV_DSTIP ? " (INV)" : "");*/
+ return 0;
+ }
+
+ /* Look for ifname matches; this should unroll nicely. */
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)indev)[i]
+ ^ ((const unsigned long *)ip6info->iniface)[i])
+ & ((const unsigned long *)ip6info->iniface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
+ dprintf("VIA in mismatch (%s vs %s).%s\n",
+ indev, ip6info->iniface,
+ ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+ return 0;
+ }
+
+ for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
+ ret |= (((const unsigned long *)outdev)[i]
+ ^ ((const unsigned long *)ip6info->outiface)[i])
+ & ((const unsigned long *)ip6info->outiface_mask)[i];
+ }
+
+ if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
+ dprintf("VIA out mismatch (%s vs %s).%s\n",
+ outdev, ip6info->outiface,
+ ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+ return 0;
+ }
+
+/* ... might want to do something with class and flowlabel here ... */
+
+ /* look for the desired protocol header */
+ if((ip6info->flags & IP6T_F_PROTO)) {
+ u_int8_t currenthdr = ipv6->nexthdr;
+ struct ipv6_opt_hdr _hdr, *hp;
+ u_int16_t ptr; /* Header offset in skb */
+ u_int16_t hdrlen; /* Header */
+ u_int16_t _fragoff = 0, *fp = NULL;
+
+ ptr = IPV6_HDR_LEN;
+
+ while (ip6t_ext_hdr(currenthdr)) {
+ /* Is there enough space for the next ext header? */
+ if (skb->len - ptr < IPV6_OPTHDR_LEN)
+ return 0;
+
+ /* NONE or ESP: there isn't protocol part */
+ /* If we want to count these packets in '-p all',
+ * we will change the return 0 to 1*/
+ if ((currenthdr == IPPROTO_NONE) ||
+ (currenthdr == IPPROTO_ESP))
+ break;
+
+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
+ BUG_ON(hp == NULL);
+
+ /* Size calculation */
+ if (currenthdr == IPPROTO_FRAGMENT) {
+ fp = skb_header_pointer(skb,
+ ptr+offsetof(struct frag_hdr,
+ frag_off),
+ sizeof(_fragoff),
+ &_fragoff);
+ if (fp == NULL)
+ return 0;
+
+ _fragoff = ntohs(*fp) & ~0x7;
+ hdrlen = 8;
+ } else if (currenthdr == IPPROTO_AH)
+ hdrlen = (hp->hdrlen+2)<<2;
+ else
+ hdrlen = ipv6_optlen(hp);
+
+ currenthdr = hp->nexthdr;
+ ptr += hdrlen;
+ /* ptr is too large */
+ if ( ptr > skb->len )
+ return 0;
+ if (_fragoff) {
+ if (ip6t_ext_hdr(currenthdr))
+ return 0;
+ break;
+ }
+ }
+
+ *protoff = ptr;
+ *fragoff = _fragoff;
+
+ /* currenthdr contains the protocol header */
+
+ dprintf("Packet protocol %hi ?= %s%hi.\n",
+ currenthdr,
+ ip6info->invflags & IP6T_INV_PROTO ? "!":"",
+ ip6info->proto);
+
+ if (ip6info->proto == currenthdr) {
+ if(ip6info->invflags & IP6T_INV_PROTO) {
+ return 0;
+ }
+ return 1;
+ }
+
+ /* We need match for the '-p all', too! */
+ if ((ip6info->proto != 0) &&
+ !(ip6info->invflags & IP6T_INV_PROTO))
+ return 0;
+ }
+ return 1;
+}
+
+/* should be ip6 safe */
+static inline int
+ip6_checkentry(const struct ip6t_ip6 *ipv6)
+{
+ if (ipv6->flags & ~IP6T_F_MASK) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ipv6->flags & ~IP6T_F_MASK);
+ return 0;
+ }
+ if (ipv6->invflags & ~IP6T_INV_MASK) {
+ duprintf("Unknown invflag bits set: %08X\n",
+ ipv6->invflags & ~IP6T_INV_MASK);
+ return 0;
+ }
+ return 1;
+}
+
+static unsigned int
+ip6t_error(struct sk_buff **pskb,
+ const struct net_device *in,
+ const struct net_device *out,
+ unsigned int hooknum,
+ const void *targinfo,
+ void *userinfo)
+{
+ if (net_ratelimit())
+ printk("ip6_tables: error: `%s'\n", (char *)targinfo);
+
+ return NF_DROP;
+}
+
+static inline
+int do_match(struct ip6t_entry_match *m,
+ const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ /* Stop iteration if it doesn't match */
+ if (!m->u.kernel.match->match(skb, in, out, m->data,
+ offset, protoff, hotdrop))
+ return 1;
+ else
+ return 0;
+}
+
+static inline struct ip6t_entry *
+get_entry(void *base, unsigned int offset)
+{
+ return (struct ip6t_entry *)(base + offset);
+}
+
+/* Returns one of the generic firewall policies, like NF_ACCEPT. */
+unsigned int
+ip6t_do_table(struct sk_buff **pskb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct ip6t_table *table,
+ void *userdata)
+{
+ static const char nulldevname[IFNAMSIZ];
+ int offset = 0;
+ unsigned int protoff = 0;
+ int hotdrop = 0;
+ /* Initializing verdict to NF_DROP keeps gcc happy. */
+ unsigned int verdict = NF_DROP;
+ const char *indev, *outdev;
+ void *table_base;
+ struct ip6t_entry *e, *back;
+
+ /* Initialization */
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+
+ /* We handle fragments by dealing with the first fragment as
+ * if it was a normal packet. All other fragments are treated
+ * normally, except that they will NEVER match rules that ask
+ * things we don't know, ie. tcp syn flag or ports). If the
+ * rule is also a fragment-specific rule, non-fragments won't
+ * match it. */
+
+ read_lock_bh(&table->lock);
+ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ table_base = (void *)table->private->entries
+ + TABLE_OFFSET(table->private, smp_processor_id());
+ e = get_entry(table_base, table->private->hook_entry[hook]);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ /* Check noone else using our table */
+ if (((struct ip6t_entry *)table_base)->comefrom != 0xdead57ac
+ && ((struct ip6t_entry *)table_base)->comefrom != 0xeeeeeeec) {
+ printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
+ smp_processor_id(),
+ table->name,
+ &((struct ip6t_entry *)table_base)->comefrom,
+ ((struct ip6t_entry *)table_base)->comefrom);
+ }
+ ((struct ip6t_entry *)table_base)->comefrom = 0x57acc001;
+#endif
+
+ /* For return from builtin chain */
+ back = get_entry(table_base, table->private->underflow[hook]);
+
+ do {
+ IP_NF_ASSERT(e);
+ IP_NF_ASSERT(back);
+ (*pskb)->nfcache |= e->nfcache;
+ if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6,
+ &protoff, &offset)) {
+ struct ip6t_entry_target *t;
+
+ if (IP6T_MATCH_ITERATE(e, do_match,
+ *pskb, in, out,
+ offset, protoff, &hotdrop) != 0)
+ goto no_match;
+
+ ADD_COUNTER(e->counters,
+ ntohs((*pskb)->nh.ipv6h->payload_len)
+ + IPV6_HDR_LEN,
+ 1);
+
+ t = ip6t_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct ip6t_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != IP6T_RETURN) {
+ verdict = (unsigned)(-v) - 1;
+ break;
+ }
+ e = back;
+ back = get_entry(table_base,
+ back->comefrom);
+ continue;
+ }
+ if (table_base + v
+ != (void *)e + e->next_offset) {
+ /* Save old back ptr in next entry */
+ struct ip6t_entry *next
+ = (void *)e + e->next_offset;
+ next->comefrom
+ = (void *)back - table_base;
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ } else {
+ /* Targets which reenter must return
+ abs. verdicts */
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ip6t_entry *)table_base)->comefrom
+ = 0xeeeeeeec;
+#endif
+ verdict = t->u.kernel.target->target(pskb,
+ in, out,
+ hook,
+ t->data,
+ userdata);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ if (((struct ip6t_entry *)table_base)->comefrom
+ != 0xeeeeeeec
+ && verdict == IP6T_CONTINUE) {
+ printk("Target %s reentered!\n",
+ t->u.kernel.target->name);
+ verdict = NF_DROP;
+ }
+ ((struct ip6t_entry *)table_base)->comefrom
+ = 0x57acc001;
+#endif
+ if (verdict == IP6T_CONTINUE)
+ e = (void *)e + e->next_offset;
+ else
+ /* Verdict */
+ break;
+ }
+ } else {
+
+ no_match:
+ e = (void *)e + e->next_offset;
+ }
+ } while (!hotdrop);
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ ((struct ip6t_entry *)table_base)->comefrom = 0xdead57ac;
+#endif
+ read_unlock_bh(&table->lock);
+
+#ifdef DEBUG_ALLOW_ALL
+ return NF_ACCEPT;
+#else
+ if (hotdrop)
+ return NF_DROP;
+ else return verdict;
+#endif
+}
+
+/* If it succeeds, returns element and locks mutex */
+static inline void *
+find_inlist_lock_noload(struct list_head *head,
+ const char *name,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+#if 1
+ duprintf("find_inlist: searching for `%s' in %s.\n",
+ name, head == &ip6t_target ? "ip6t_target"
+ : head == &ip6t_match ? "ip6t_match"
+ : head == &ip6t_tables ? "ip6t_tables" : "UNKNOWN");
+#endif
+
+ *error = down_interruptible(mutex);
+ if (*error != 0)
+ return NULL;
+
+ ret = list_named_find(head, name);
+ if (!ret) {
+ *error = -ENOENT;
+ up(mutex);
+ }
+ return ret;
+}
+
+#ifndef CONFIG_KMOD
+#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m))
+#else
+static void *
+find_inlist_lock(struct list_head *head,
+ const char *name,
+ const char *prefix,
+ int *error,
+ struct semaphore *mutex)
+{
+ void *ret;
+
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ if (!ret) {
+ duprintf("find_inlist: loading `%s%s'.\n", prefix, name);
+ request_module("%s%s", prefix, name);
+ ret = find_inlist_lock_noload(head, name, error, mutex);
+ }
+
+ return ret;
+}
+#endif
+
+static inline struct ip6t_table *
+ip6t_find_table_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ip6t_tables, name, "ip6table_", error, mutex);
+}
+
+static inline struct ip6t_match *
+find_match_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ip6t_match, name, "ip6t_", error, mutex);
+}
+
+static struct ip6t_target *
+ip6t_find_target_lock(const char *name, int *error, struct semaphore *mutex)
+{
+ return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex);
+}
+
+/* All zeroes == unconditional rule. */
+static inline int
+unconditional(const struct ip6t_ip6 *ipv6)
+{
+ unsigned int i;
+
+ for (i = 0; i < sizeof(*ipv6); i++)
+ if (((char *)ipv6)[i])
+ break;
+
+ return (i == sizeof(*ipv6));
+}
+
+/* Figures out from what hook each rule can be called: returns 0 if
+ there are loops. Puts hook bitmask in comefrom. */
+static int
+mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks)
+{
+ unsigned int hook;
+
+ /* No recursion; use packet counter to save back ptrs (reset
+ to 0 as we leave), and comefrom to save source hook bitmask */
+ for (hook = 0; hook < NF_IP6_NUMHOOKS; hook++) {
+ unsigned int pos = newinfo->hook_entry[hook];
+ struct ip6t_entry *e
+ = (struct ip6t_entry *)(newinfo->entries + pos);
+
+ if (!(valid_hooks & (1 << hook)))
+ continue;
+
+ /* Set initial back pointer. */
+ e->counters.pcnt = pos;
+
+ for (;;) {
+ struct ip6t_standard_target *t
+ = (void *)ip6t_get_target(e);
+
+ if (e->comefrom & (1 << NF_IP6_NUMHOOKS)) {
+ printk("iptables: loop hook %u pos %u %08X.\n",
+ hook, pos, e->comefrom);
+ return 0;
+ }
+ e->comefrom
+ |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS));
+
+ /* Unconditional return/END. */
+ if (e->target_offset == sizeof(struct ip6t_entry)
+ && (strcmp(t->target.u.user.name,
+ IP6T_STANDARD_TARGET) == 0)
+ && t->verdict < 0
+ && unconditional(&e->ipv6)) {
+ unsigned int oldpos, size;
+
+ /* Return: backtrack through the last
+ big jump. */
+ do {
+ e->comefrom ^= (1<<NF_IP6_NUMHOOKS);
+#ifdef DEBUG_IP_FIREWALL_USER
+ if (e->comefrom
+ & (1 << NF_IP6_NUMHOOKS)) {
+ duprintf("Back unset "
+ "on hook %u "
+ "rule %u\n",
+ hook, pos);
+ }
+#endif
+ oldpos = pos;
+ pos = e->counters.pcnt;
+ e->counters.pcnt = 0;
+
+ /* We're at the start. */
+ if (pos == oldpos)
+ goto next;
+
+ e = (struct ip6t_entry *)
+ (newinfo->entries + pos);
+ } while (oldpos == pos + e->next_offset);
+
+ /* Move along one */
+ size = e->next_offset;
+ e = (struct ip6t_entry *)
+ (newinfo->entries + pos + size);
+ e->counters.pcnt = pos;
+ pos += size;
+ } else {
+ int newpos = t->verdict;
+
+ if (strcmp(t->target.u.user.name,
+ IP6T_STANDARD_TARGET) == 0
+ && newpos >= 0) {
+ /* This a jump; chase it. */
+ duprintf("Jump rule %u -> %u\n",
+ pos, newpos);
+ } else {
+ /* ... this is a fallthru */
+ newpos = pos + e->next_offset;
+ }
+ e = (struct ip6t_entry *)
+ (newinfo->entries + newpos);
+ e->counters.pcnt = pos;
+ pos = newpos;
+ }
+ }
+ next:
+ duprintf("Finished chain %u\n", hook);
+ }
+ return 1;
+}
+
+static inline int
+cleanup_match(struct ip6t_entry_match *m, unsigned int *i)
+{
+ if (i && (*i)-- == 0)
+ return 1;
+
+ if (m->u.kernel.match->destroy)
+ m->u.kernel.match->destroy(m->data,
+ m->u.match_size - sizeof(*m));
+ module_put(m->u.kernel.match->me);
+ return 0;
+}
+
+static inline int
+standard_check(const struct ip6t_entry_target *t,
+ unsigned int max_offset)
+{
+ struct ip6t_standard_target *targ = (void *)t;
+
+ /* Check standard info. */
+ if (t->u.target_size
+ != IP6T_ALIGN(sizeof(struct ip6t_standard_target))) {
+ duprintf("standard_check: target size %u != %u\n",
+ t->u.target_size,
+ IP6T_ALIGN(sizeof(struct ip6t_standard_target)));
+ return 0;
+ }
+
+ if (targ->verdict >= 0
+ && targ->verdict > max_offset - sizeof(struct ip6t_entry)) {
+ duprintf("ip6t_standard_check: bad verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+
+ if (targ->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("ip6t_standard_check: bad negative verdict (%i)\n",
+ targ->verdict);
+ return 0;
+ }
+ return 1;
+}
+
+static inline int
+check_match(struct ip6t_entry_match *m,
+ const char *name,
+ const struct ip6t_ip6 *ipv6,
+ unsigned int hookmask,
+ unsigned int *i)
+{
+ int ret;
+ struct ip6t_match *match;
+
+ match = find_match_lock(m->u.user.name, &ret, &ip6t_mutex);
+ if (!match) {
+ // duprintf("check_match: `%s' not found\n", m->u.name);
+ return ret;
+ }
+ if (!try_module_get(match->me)) {
+ up(&ip6t_mutex);
+ return -ENOENT;
+ }
+ m->u.kernel.match = match;
+ up(&ip6t_mutex);
+
+ if (m->u.kernel.match->checkentry
+ && !m->u.kernel.match->checkentry(name, ipv6, m->data,
+ m->u.match_size - sizeof(*m),
+ hookmask)) {
+ module_put(m->u.kernel.match->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ m->u.kernel.match->name);
+ return -EINVAL;
+ }
+
+ (*i)++;
+ return 0;
+}
+
+static struct ip6t_target ip6t_standard_target;
+
+static inline int
+check_entry(struct ip6t_entry *e, const char *name, unsigned int size,
+ unsigned int *i)
+{
+ struct ip6t_entry_target *t;
+ struct ip6t_target *target;
+ int ret;
+ unsigned int j;
+
+ if (!ip6_checkentry(&e->ipv6)) {
+ duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ return -EINVAL;
+ }
+
+ j = 0;
+ ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, e->comefrom, &j);
+ if (ret != 0)
+ goto cleanup_matches;
+
+ t = ip6t_get_target(e);
+ target = ip6t_find_target_lock(t->u.user.name, &ret, &ip6t_mutex);
+ if (!target) {
+ duprintf("check_entry: `%s' not found\n", t->u.user.name);
+ goto cleanup_matches;
+ }
+ if (!try_module_get(target->me)) {
+ up(&ip6t_mutex);
+ ret = -ENOENT;
+ goto cleanup_matches;
+ }
+ t->u.kernel.target = target;
+ up(&ip6t_mutex);
+ if (!t->u.kernel.target) {
+ ret = -EBUSY;
+ goto cleanup_matches;
+ }
+ if (t->u.kernel.target == &ip6t_standard_target) {
+ if (!standard_check(t, size)) {
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+ } else if (t->u.kernel.target->checkentry
+ && !t->u.kernel.target->checkentry(name, e, t->data,
+ t->u.target_size
+ - sizeof(*t),
+ e->comefrom)) {
+ module_put(t->u.kernel.target->me);
+ duprintf("ip_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ ret = -EINVAL;
+ goto cleanup_matches;
+ }
+
+ (*i)++;
+ return 0;
+
+ cleanup_matches:
+ IP6T_MATCH_ITERATE(e, cleanup_match, &j);
+ return ret;
+}
+
+static inline int
+check_entry_size_and_hooks(struct ip6t_entry *e,
+ struct ip6t_table_info *newinfo,
+ unsigned char *base,
+ unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ unsigned int *i)
+{
+ unsigned int h;
+
+ if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0
+ || (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+ duprintf("Bad offset %p\n", e);
+ return -EINVAL;
+ }
+
+ if (e->next_offset
+ < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_IP6_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* FIXME: underflows must be unconditional, standard verdicts
+ < 0 (not IP6T_RETURN). --RR */
+
+ /* Clear counters and comefrom */
+ e->counters = ((struct ip6t_counters) { 0, 0 });
+ e->comefrom = 0;
+
+ (*i)++;
+ return 0;
+}
+
+static inline int
+cleanup_entry(struct ip6t_entry *e, unsigned int *i)
+{
+ struct ip6t_entry_target *t;
+
+ if (i && (*i)-- == 0)
+ return 1;
+
+ /* Cleanup all matches */
+ IP6T_MATCH_ITERATE(e, cleanup_match, NULL);
+ t = ip6t_get_target(e);
+ if (t->u.kernel.target->destroy)
+ t->u.kernel.target->destroy(t->data,
+ t->u.target_size - sizeof(*t));
+ module_put(t->u.kernel.target->me);
+ return 0;
+}
+
+/* Checks and translates the user-supplied table segment (held in
+ newinfo) */
+static int
+translate_table(const char *name,
+ unsigned int valid_hooks,
+ struct ip6t_table_info *newinfo,
+ unsigned int size,
+ unsigned int number,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows)
+{
+ unsigned int i;
+ int ret;
+
+ newinfo->size = size;
+ newinfo->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = 0xFFFFFFFF;
+ newinfo->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_table: size %u\n", newinfo->size);
+ i = 0;
+ /* Walk through entries, checking offsets. */
+ ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry_size_and_hooks,
+ newinfo,
+ newinfo->entries,
+ newinfo->entries + size,
+ hook_entries, underflows, &i);
+ if (ret != 0)
+ return ret;
+
+ if (i != number) {
+ duprintf("translate_table: %u not %u entries\n",
+ i, number);
+ return -EINVAL;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_IP6_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ return -EINVAL;
+ }
+ if (newinfo->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ return -EINVAL;
+ }
+ }
+
+ if (!mark_source_chains(newinfo, valid_hooks))
+ return -ELOOP;
+
+ /* Finally, each sanity check must pass */
+ i = 0;
+ ret = IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ check_entry, name, size, &i);
+
+ if (ret != 0) {
+ IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size,
+ cleanup_entry, &i);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for (i = 1; i < num_possible_cpus(); i++) {
+ memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
+ newinfo->entries,
+ SMP_ALIGN(newinfo->size));
+ }
+
+ return ret;
+}
+
+static struct ip6t_table_info *
+replace_table(struct ip6t_table *table,
+ unsigned int num_counters,
+ struct ip6t_table_info *newinfo,
+ int *error)
+{
+ struct ip6t_table_info *oldinfo;
+
+#ifdef CONFIG_NETFILTER_DEBUG
+ {
+ struct ip6t_entry *table_base;
+ unsigned int i;
+
+ for (i = 0; i < num_possible_cpus(); i++) {
+ table_base =
+ (void *)newinfo->entries
+ + TABLE_OFFSET(newinfo, i);
+
+ table_base->comefrom = 0xdead57ac;
+ }
+ }
+#endif
+
+ /* Do the substitution. */
+ write_lock_bh(&table->lock);
+ /* Check inside lock: is the old number correct? */
+ if (num_counters != table->private->number) {
+ duprintf("num_counters != table->private->number (%u/%u)\n",
+ num_counters, table->private->number);
+ write_unlock_bh(&table->lock);
+ *error = -EAGAIN;
+ return NULL;
+ }
+ oldinfo = table->private;
+ table->private = newinfo;
+ newinfo->initial_entries = oldinfo->initial_entries;
+ write_unlock_bh(&table->lock);
+
+ return oldinfo;
+}
+
+/* Gets counters. */
+static inline int
+add_entry_to_counter(const struct ip6t_entry *e,
+ struct ip6t_counters total[],
+ unsigned int *i)
+{
+ ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static void
+get_counters(const struct ip6t_table_info *t,
+ struct ip6t_counters counters[])
+{
+ unsigned int cpu;
+ unsigned int i;
+
+ for (cpu = 0; cpu < num_possible_cpus(); cpu++) {
+ i = 0;
+ IP6T_ENTRY_ITERATE(t->entries + TABLE_OFFSET(t, cpu),
+ t->size,
+ add_entry_to_counter,
+ counters,
+ &i);
+ }
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ struct ip6t_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num, countersize;
+ struct ip6t_entry *e;
+ struct ip6t_counters *counters;
+ int ret = 0;
+
+ /* We need atomic snapshot of counters: rest doesn't change
+ (other than comefrom, which userspace doesn't care
+ about). */
+ countersize = sizeof(struct ip6t_counters) * table->private->number;
+ counters = vmalloc(countersize);
+
+ if (counters == NULL)
+ return -ENOMEM;
+
+ /* First, sum counters... */
+ memset(counters, 0, countersize);
+ write_lock_bh(&table->lock);
+ get_counters(table->private, counters);
+ write_unlock_bh(&table->lock);
+
+ /* ... then copy entire thing from CPU 0... */
+ if (copy_to_user(userptr, table->private->entries, total_size) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ /* FIXME: use iterator macros --RR */
+ /* ... then go back and fix counters and names */
+ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
+ unsigned int i;
+ struct ip6t_entry_match *m;
+ struct ip6t_entry_target *t;
+
+ e = (struct ip6t_entry *)(table->private->entries + off);
+ if (copy_to_user(userptr + off
+ + offsetof(struct ip6t_entry, counters),
+ &counters[num],
+ sizeof(counters[num])) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+
+ for (i = sizeof(struct ip6t_entry);
+ i < e->target_offset;
+ i += m->u.match_size) {
+ m = (void *)e + i;
+
+ if (copy_to_user(userptr + off + i
+ + offsetof(struct ip6t_entry_match,
+ u.user.name),
+ m->u.kernel.match->name,
+ strlen(m->u.kernel.match->name)+1)
+ != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ t = ip6t_get_target(e);
+ if (copy_to_user(userptr + off + e->target_offset
+ + offsetof(struct ip6t_entry_target,
+ u.user.name),
+ t->u.kernel.target->name,
+ strlen(t->u.kernel.target->name)+1) != 0) {
+ ret = -EFAULT;
+ goto free_counters;
+ }
+ }
+
+ free_counters:
+ vfree(counters);
+ return ret;
+}
+
+static int
+get_entries(const struct ip6t_get_entries *entries,
+ struct ip6t_get_entries __user *uptr)
+{
+ int ret;
+ struct ip6t_table *t;
+
+ t = ip6t_find_table_lock(entries->name, &ret, &ip6t_mutex);
+ if (t) {
+ duprintf("t->private->number = %u\n",
+ t->private->number);
+ if (entries->size == t->private->size)
+ ret = copy_entries_to_user(t->private->size,
+ t, uptr->entrytable);
+ else {
+ duprintf("get_entries: I've got %u not %u!\n",
+ t->private->size,
+ entries->size);
+ ret = -EINVAL;
+ }
+ up(&ip6t_mutex);
+ } else
+ duprintf("get_entries: Can't find %s!\n",
+ entries->name);
+
+ return ret;
+}
+
+static int
+do_replace(void __user *user, unsigned int len)
+{
+ int ret;
+ struct ip6t_replace tmp;
+ struct ip6t_table *t;
+ struct ip6t_table_info *newinfo, *oldinfo;
+ struct ip6t_counters *counters;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
+ if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
+ return -ENOMEM;
+
+ newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ + SMP_ALIGN(tmp.size) * num_possible_cpus());
+ if (!newinfo)
+ return -ENOMEM;
+
+ if (copy_from_user(newinfo->entries, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ counters = vmalloc(tmp.num_counters * sizeof(struct ip6t_counters));
+ if (!counters) {
+ ret = -ENOMEM;
+ goto free_newinfo;
+ }
+ memset(counters, 0, tmp.num_counters * sizeof(struct ip6t_counters));
+
+ ret = translate_table(tmp.name, tmp.valid_hooks,
+ newinfo, tmp.size, tmp.num_entries,
+ tmp.hook_entry, tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo_counters;
+
+ duprintf("ip_tables: Translated table\n");
+
+ t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
+ if (!t)
+ goto free_newinfo_counters_untrans;
+
+ /* You lied! */
+ if (tmp.valid_hooks != t->valid_hooks) {
+ duprintf("Valid hook crap: %08X vs %08X\n",
+ tmp.valid_hooks, t->valid_hooks);
+ ret = -EINVAL;
+ goto free_newinfo_counters_untrans_unlock;
+ }
+
+ /* Get a reference in advance, we're not allowed fail later */
+ if (!try_module_get(t->me)) {
+ ret = -EBUSY;
+ goto free_newinfo_counters_untrans_unlock;
+ }
+
+ oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
+ if (!oldinfo)
+ goto put_module;
+
+ /* Update module usage count based on number of rules */
+ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
+ oldinfo->number, oldinfo->initial_entries, newinfo->number);
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+ if ((oldinfo->number > oldinfo->initial_entries) &&
+ (newinfo->number <= oldinfo->initial_entries))
+ module_put(t->me);
+
+ /* Get the old counters. */
+ get_counters(oldinfo, counters);
+ /* Decrease module usage counts and free resource */
+ IP6T_ENTRY_ITERATE(oldinfo->entries, oldinfo->size, cleanup_entry,NULL);
+ vfree(oldinfo);
+ /* Silent error: too late now. */
+ if (copy_to_user(tmp.counters, counters,
+ sizeof(struct ip6t_counters) * tmp.num_counters) != 0)
+ ret = -EFAULT;
+ vfree(counters);
+ up(&ip6t_mutex);
+ return ret;
+
+ put_module:
+ module_put(t->me);
+ free_newinfo_counters_untrans_unlock:
+ up(&ip6t_mutex);
+ free_newinfo_counters_untrans:
+ IP6T_ENTRY_ITERATE(newinfo->entries, newinfo->size, cleanup_entry,NULL);
+ free_newinfo_counters:
+ vfree(counters);
+ free_newinfo:
+ vfree(newinfo);
+ return ret;
+}
+
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static inline int
+add_counter_to_entry(struct ip6t_entry *e,
+ const struct ip6t_counters addme[],
+ unsigned int *i)
+{
+#if 0
+ duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
+ *i,
+ (long unsigned int)e->counters.pcnt,
+ (long unsigned int)e->counters.bcnt,
+ (long unsigned int)addme[*i].pcnt,
+ (long unsigned int)addme[*i].bcnt);
+#endif
+
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
+static int
+do_add_counters(void __user *user, unsigned int len)
+{
+ unsigned int i;
+ struct ip6t_counters_info tmp, *paddc;
+ struct ip6t_table *t;
+ int ret;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ip6t_counters))
+ return -EINVAL;
+
+ paddc = vmalloc(len);
+ if (!paddc)
+ return -ENOMEM;
+
+ if (copy_from_user(paddc, user, len) != 0) {
+ ret = -EFAULT;
+ goto free;
+ }
+
+ t = ip6t_find_table_lock(tmp.name, &ret, &ip6t_mutex);
+ if (!t)
+ goto free;
+
+ write_lock_bh(&t->lock);
+ if (t->private->number != paddc->num_counters) {
+ ret = -EINVAL;
+ goto unlock_up_free;
+ }
+
+ i = 0;
+ IP6T_ENTRY_ITERATE(t->private->entries,
+ t->private->size,
+ add_counter_to_entry,
+ paddc->counters,
+ &i);
+ unlock_up_free:
+ write_unlock_bh(&t->lock);
+ up(&ip6t_mutex);
+ free:
+ vfree(paddc);
+
+ return ret;
+}
+
+static int
+do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_SET_REPLACE:
+ ret = do_replace(user, len);
+ break;
+
+ case IP6T_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(user, len);
+ break;
+
+ default:
+ duprintf("do_ip6t_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int
+do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IP6T_SO_GET_INFO: {
+ char name[IP6T_TABLE_MAXNAMELEN];
+ struct ip6t_table *t;
+
+ if (*len != sizeof(struct ip6t_getinfo)) {
+ duprintf("length %u != %u\n", *len,
+ sizeof(struct ip6t_getinfo));
+ ret = -EINVAL;
+ break;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0) {
+ ret = -EFAULT;
+ break;
+ }
+ name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+ t = ip6t_find_table_lock(name, &ret, &ip6t_mutex);
+ if (t) {
+ struct ip6t_getinfo info;
+
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, t->private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, t->private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = t->private->number;
+ info.size = t->private->size;
+ memcpy(info.name, name, sizeof(info.name));
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ up(&ip6t_mutex);
+ }
+ }
+ break;
+
+ case IP6T_SO_GET_ENTRIES: {
+ struct ip6t_get_entries get;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %u\n", *len, sizeof(get));
+ ret = -EINVAL;
+ } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
+ ret = -EFAULT;
+ } else if (*len != sizeof(struct ip6t_get_entries) + get.size) {
+ duprintf("get_entries: %u != %u\n", *len,
+ sizeof(struct ip6t_get_entries) + get.size);
+ ret = -EINVAL;
+ } else
+ ret = get_entries(&get, user);
+ break;
+ }
+
+ default:
+ duprintf("do_ip6t_get_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Registration hooks for targets. */
+int
+ip6t_register_target(struct ip6t_target *target)
+{
+ int ret;
+
+ ret = down_interruptible(&ip6t_mutex);
+ if (ret != 0)
+ return ret;
+
+ if (!list_named_insert(&ip6t_target, target)) {
+ duprintf("ip6t_register_target: `%s' already in list!\n",
+ target->name);
+ ret = -EINVAL;
+ }
+ up(&ip6t_mutex);
+ return ret;
+}
+
+void
+ip6t_unregister_target(struct ip6t_target *target)
+{
+ down(&ip6t_mutex);
+ LIST_DELETE(&ip6t_target, target);
+ up(&ip6t_mutex);
+}
+
+int
+ip6t_register_match(struct ip6t_match *match)
+{
+ int ret;
+
+ ret = down_interruptible(&ip6t_mutex);
+ if (ret != 0)
+ return ret;
+
+ if (!list_named_insert(&ip6t_match, match)) {
+ duprintf("ip6t_register_match: `%s' already in list!\n",
+ match->name);
+ ret = -EINVAL;
+ }
+ up(&ip6t_mutex);
+
+ return ret;
+}
+
+void
+ip6t_unregister_match(struct ip6t_match *match)
+{
+ down(&ip6t_mutex);
+ LIST_DELETE(&ip6t_match, match);
+ up(&ip6t_mutex);
+}
+
+int ip6t_register_table(struct ip6t_table *table,
+ const struct ip6t_replace *repl)
+{
+ int ret;
+ struct ip6t_table_info *newinfo;
+ static struct ip6t_table_info bootstrap
+ = { 0, 0, 0, { 0 }, { 0 }, { } };
+
+ newinfo = vmalloc(sizeof(struct ip6t_table_info)
+ + SMP_ALIGN(repl->size) * num_possible_cpus());
+ if (!newinfo)
+ return -ENOMEM;
+
+ memcpy(newinfo->entries, repl->entries, repl->size);
+
+ ret = translate_table(table->name, table->valid_hooks,
+ newinfo, repl->size,
+ repl->num_entries,
+ repl->hook_entry,
+ repl->underflow);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ ret = down_interruptible(&ip6t_mutex);
+ if (ret != 0) {
+ vfree(newinfo);
+ return ret;
+ }
+
+ /* Don't autoload: we'd eat our tail... */
+ if (list_named_find(&ip6t_tables, table->name)) {
+ ret = -EEXIST;
+ goto free_unlock;
+ }
+
+ /* Simplifies replace_table code. */
+ table->private = &bootstrap;
+ if (!replace_table(table, 0, newinfo, &ret))
+ goto free_unlock;
+
+ duprintf("table->private->number = %u\n",
+ table->private->number);
+
+ /* save number of initial entries */
+ table->private->initial_entries = table->private->number;
+
+ rwlock_init(&table->lock);
+ list_prepend(&ip6t_tables, table);
+
+ unlock:
+ up(&ip6t_mutex);
+ return ret;
+
+ free_unlock:
+ vfree(newinfo);
+ goto unlock;
+}
+
+void ip6t_unregister_table(struct ip6t_table *table)
+{
+ down(&ip6t_mutex);
+ LIST_DELETE(&ip6t_tables, table);
+ up(&ip6t_mutex);
+
+ /* Decrease module usage counts and free resources */
+ IP6T_ENTRY_ITERATE(table->private->entries, table->private->size,
+ cleanup_entry, NULL);
+ vfree(table->private);
+}
+
+/* Returns 1 if the port is matched by the range, 0 otherwise */
+static inline int
+port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
+{
+ int ret;
+
+ ret = (port >= min && port <= max) ^ invert;
+ return ret;
+}
+
+static int
+tcp_find_option(u_int8_t option,
+ const struct sk_buff *skb,
+ unsigned int tcpoff,
+ unsigned int optlen,
+ int invert,
+ int *hotdrop)
+{
+ /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
+ u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
+ unsigned int i;
+
+ duprintf("tcp_match: finding option\n");
+ if (!optlen)
+ return invert;
+ /* If we don't have the whole header, drop packet. */
+ op = skb_header_pointer(skb, tcpoff + sizeof(struct tcphdr), optlen,
+ _opt);
+ if (op == NULL) {
+ *hotdrop = 1;
+ return 0;
+ }
+
+ for (i = 0; i < optlen; ) {
+ if (op[i] == option) return !invert;
+ if (op[i] < 2) i++;
+ else i += op[i+1]?:1;
+ }
+
+ return invert;
+}
+
+static int
+tcp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ struct tcphdr _tcph, *th;
+ const struct ip6t_tcp *tcpinfo = matchinfo;
+
+ if (offset) {
+ /* To quote Alan:
+
+ Don't allow a fragment of TCP 8 bytes in. Nobody normal
+ causes this. Its a cracker trying to break in by doing a
+ flag overwrite to pass the direction checks.
+ */
+ if (offset == 1) {
+ duprintf("Dropping evil TCP offset=1 frag.\n");
+ *hotdrop = 1;
+ }
+ /* Must not be a fragment. */
+ return 0;
+ }
+
+#define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
+
+ th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph);
+ if (th == NULL) {
+ /* We've been asked to examine this packet, and we
+ can't. Hence, no choice but to drop. */
+ duprintf("Dropping evil TCP offset=0 tinygram.\n");
+ *hotdrop = 1;
+ return 0;
+ }
+
+ if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
+ ntohs(th->source),
+ !!(tcpinfo->invflags & IP6T_TCP_INV_SRCPT)))
+ return 0;
+ if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
+ ntohs(th->dest),
+ !!(tcpinfo->invflags & IP6T_TCP_INV_DSTPT)))
+ return 0;
+ if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
+ == tcpinfo->flg_cmp,
+ IP6T_TCP_INV_FLAGS))
+ return 0;
+ if (tcpinfo->option) {
+ if (th->doff * 4 < sizeof(_tcph)) {
+ *hotdrop = 1;
+ return 0;
+ }
+ if (!tcp_find_option(tcpinfo->option, skb, protoff,
+ th->doff*4 - sizeof(*th),
+ tcpinfo->invflags & IP6T_TCP_INV_OPTION,
+ hotdrop))
+ return 0;
+ }
+ return 1;
+}
+
+/* Called when user tries to insert an entry of this type. */
+static int
+tcp_checkentry(const char *tablename,
+ const struct ip6t_ip6 *ipv6,
+ void *matchinfo,
+ unsigned int matchsize,
+ unsigned int hook_mask)
+{
+ const struct ip6t_tcp *tcpinfo = matchinfo;
+
+ /* Must specify proto == TCP, and no unknown invflags */
+ return ipv6->proto == IPPROTO_TCP
+ && !(ipv6->invflags & IP6T_INV_PROTO)
+ && matchsize == IP6T_ALIGN(sizeof(struct ip6t_tcp))
+ && !(tcpinfo->invflags & ~IP6T_TCP_INV_MASK);
+}
+
+static int
+udp_match(const struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ const void *matchinfo,
+ int offset,
+ unsigned int protoff,
+ int *hotdrop)
+{
+ struct udphdr _udph, *uh;
+ const struct ip6t_udp *udpinfo = matchinfo;
+
+ /* Must not be a fragment. */
+ if (offset)
+ return 0;
+
+ uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph);
+ if (uh == NULL) {
+ /* We've been asked to examine this packet, and