diff options
author | 2008-02-05 22:57:30 +0000 | |
---|---|---|
committer | 2008-02-05 22:57:30 +0000 | |
commit | 633baea01c6bbe9e8ec7aeca9cd689d1f6a91b12 (patch) | |
tree | 9b94c9c81588c831211eee86082db4f72cb3788c | |
parent | ACPI 3.0 seems to work just fine so lets enable it. (diff) | |
download | wireguard-openbsd-633baea01c6bbe9e8ec7aeca9cd689d1f6a91b12.tar.xz wireguard-openbsd-633baea01c6bbe9e8ec7aeca9cd689d1f6a91b12.zip |
Move carp load balancing (ARP/IP) to a simpler configuration scheme.
Instead of using the same IP on multiple interfaces, carp has to be
configured with the new "carpnodes" and "balancing" options.
# ifconfig carp0 carpnodes 1:0,2:100,3:100 balancing ip carpdev sis0 192.168.5.50
Please note, that this is a flag day for anyone using carp balancing.
You'll need to adjust your configuration accordingly.
Addititionally this diff adds IPv6 NDP balancing support.
Tested and OK mcbride@, reyk@.
Manpage help by jmc@.
-rw-r--r-- | lib/libc/gen/sysctl.3 | 14 | ||||
-rw-r--r-- | sbin/ifconfig/ifconfig.8 | 24 | ||||
-rw-r--r-- | sbin/ifconfig/ifconfig.c | 48 | ||||
-rw-r--r-- | sbin/sysctl/sysctl.8 | 5 | ||||
-rw-r--r-- | share/man/man4/carp.4 | 176 | ||||
-rw-r--r-- | sys/net/if_ethersubr.c | 8 | ||||
-rw-r--r-- | sys/netinet/if_ether.c | 22 | ||||
-rw-r--r-- | sys/netinet/ip_carp.c | 378 | ||||
-rw-r--r-- | sys/netinet/ip_carp.h | 23 | ||||
-rw-r--r-- | sys/netinet/ip_icmp.c | 5 | ||||
-rw-r--r-- | sys/netinet/ip_input.c | 14 | ||||
-rw-r--r-- | sys/netinet6/icmp6.c | 3 | ||||
-rw-r--r-- | sys/netinet6/ip6_input.c | 4 | ||||
-rw-r--r-- | sys/netinet6/nd6_nbr.c | 4 |
14 files changed, 360 insertions, 368 deletions
diff --git a/lib/libc/gen/sysctl.3 b/lib/libc/gen/sysctl.3 index 125814d387a..d5c5de04db5 100644 --- a/lib/libc/gen/sysctl.3 +++ b/lib/libc/gen/sysctl.3 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.3,v 1.176 2007/12/13 20:00:53 reyk Exp $ +.\" $OpenBSD: sysctl.3,v 1.177 2008/02/05 22:57:31 mpf Exp $ .\" .\" Copyright (c) 1993 .\" The Regents of the University of California. All rights reserved. @@ -27,7 +27,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: December 13 2007 $ +.Dd $Mdocdate: February 5 2008 $ .Dt SYSCTL 3 .Os .Sh NAME @@ -1081,7 +1081,6 @@ The currently defined protocols and names are: .It bpf bufsize integer yes .It bpf maxbufsize integer yes .It carp allow integer yes -.It carp arpbalance integer yes .It carp log integer yes .It carp preempt integer yes .It esp enable integer yes @@ -1180,15 +1179,6 @@ If set to 0, incoming packets will not be processed. If set to any other value, processing will occur. Enabled by default. -.It Li carp.arpbalance -If set to any value other than 0, the ARP balancing functionality of -.Xr carp 4 -is enabled. -When ARP requests are received for an IP address which is part of any virtual -host, carp will hash the source IP in the ARP request to select one of the -virtual hosts from the set of all the virtual hosts which have that IP address. -The master of that host will respond with the correct virtual MAC address. -Disabled by default. .It Li carp.log If set to any value other than 0, .Xr carp 4 diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8 index dbc0621e741..43565c49db2 100644 --- a/sbin/ifconfig/ifconfig.8 +++ b/sbin/ifconfig/ifconfig.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: ifconfig.8,v 1.151 2007/11/09 15:07:08 jmc Exp $ +.\" $OpenBSD: ifconfig.8,v 1.152 2008/02/05 22:57:30 mpf Exp $ .\" $NetBSD: ifconfig.8,v 1.11 1996/01/04 21:27:29 pk Exp $ .\" $FreeBSD: ifconfig.8,v 1.16 1998/02/01 07:03:29 steve Exp $ .\" @@ -31,7 +31,7 @@ .\" .\" @(#)ifconfig.8 8.4 (Berkeley) 6/1/94 .\" -.Dd $Mdocdate: November 9 2007 $ +.Dd $Mdocdate: February 5 2008 $ .Dt IFCONFIG 8 .Os .Sh NAME @@ -445,6 +445,8 @@ and .Ar carp-interface .Op Cm advbase Ar n .Op Cm advskew Ar n +.Op Cm balancing Ar mode +.Op Cm carpnodes Ar vhid:advskew,vhid:advskew,... .Op Cm carpdev Ar iface .Op Cm pass Ar passphrase .Op Cm state Ar state @@ -466,6 +468,24 @@ If the driver is a pseudo-device, skew the advertisement interval by .Ar n . This is an 8-bit number; the default value is 0. +.It Cm balancing Ar mode +If the driver is a +.Xr carp 4 +pseudo-device, set the load balancing mode to +.Ar mode . +Valid modes are +.Ar arp , +.Ar ip , +.Ar ip-stealth , +and +.Ar ip-unicast . +.It Cm carpnodes Ar vhid:advskew,vhid:advskew,... +If the driver is a +.Xr carp 4 +pseudo-device, create a load balancing group consisting of up to 32 nodes. +Each node is specified as a +.Ar vhid:advskew +tuple in a comma separated list. .It Cm carpdev Ar iface If the driver is a .Xr carp 4 diff --git a/sbin/ifconfig/ifconfig.c b/sbin/ifconfig/ifconfig.c index 6efed484d53..addce6ad612 100644 --- a/sbin/ifconfig/ifconfig.c +++ b/sbin/ifconfig/ifconfig.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ifconfig.c,v 1.192 2007/11/27 16:21:02 chl Exp $ */ +/* $OpenBSD: ifconfig.c,v 1.193 2008/02/05 22:57:30 mpf Exp $ */ /* $NetBSD: ifconfig.c,v 1.40 1997/10/01 02:19:43 enami Exp $ */ /* @@ -196,7 +196,8 @@ void setcarp_vhid(const char *, int); void setcarp_state(const char *, int); void setcarpdev(const char *, int); void unsetcarpdev(const char *, int); -void setcarpnodes(const char *, int); +void setcarp_nodes(const char *, int); +void setcarp_balancing(const char *, int); void setpfsync_syncdev(const char *, int); void setpfsync_maxupd(const char *, int); void unsetpfsync_syncdev(const char *, int); @@ -314,7 +315,8 @@ const struct cmd { { "vhid", NEXTARG, 0, setcarp_vhid }, { "state", NEXTARG, 0, setcarp_state }, { "carpdev", NEXTARG, 0, setcarpdev }, - { "carpnodes", NEXTARG, 0, setcarpnodes }, + { "carpnodes", NEXTARG, 0, setcarp_nodes }, + { "balancing", NEXTARG, 0, setcarp_balancing }, { "-carpdev", 1, 0, unsetcarpdev }, { "syncdev", NEXTARG, 0, setpfsync_syncdev }, { "-syncdev", 1, 0, unsetpfsync_syncdev }, @@ -2885,11 +2887,12 @@ unsetvlandev(const char *val, int d) } static const char *carp_states[] = { CARP_STATES }; +static const char *carp_bal_modes[] = { CARP_BAL_MODES }; void carp_status(void) { - const char *state; + const char *state, *balmode; struct carpreq carpr; int i; @@ -2902,6 +2905,11 @@ carp_status(void) if (carpr.carpr_vhids[0] == 0) return; + if (carpr.carpr_balancing > CARP_BAL_MAXID) + balmode = "<UNKNOWN>"; + else + balmode = carp_bal_modes[carpr.carpr_balancing]; + for (i = 0; carpr.carpr_vhids[i]; i++) { if (carpr.carpr_states[i] > CARP_MAXSTATE) state = "<UNKNOWN>"; @@ -2915,10 +2923,11 @@ carp_status(void) carpr.carpr_advbase, carpr.carpr_advskews[0]); } else { if (i == 0) { - printf("\tcarp: carpdev %s advbase %d\n", + printf("\tcarp: carpdev %s advbase %d" + " balancing %s\n", carpr.carpr_carpdev[0] != '\0' ? carpr.carpr_carpdev : "none", - carpr.carpr_advbase); + carpr.carpr_advbase, balmode); } printf("\t\tstate %s vhid %u advskew %u\n", state, carpr.carpr_vhids[i], carpr.carpr_advskews[i]); @@ -3078,7 +3087,7 @@ unsetcarpdev(const char *val, int d) } void -setcarpnodes(const char *val, int d) +setcarp_nodes(const char *val, int d) { char *str; int i; @@ -3120,6 +3129,31 @@ setcarpnodes(const char *val, int d) } void +setcarp_balancing(const char *val, int d) +{ + int i; + struct carpreq carpr; + + bzero((char *)&carpr, sizeof(struct carpreq)); + ifr.ifr_data = (caddr_t)&carpr; + + if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1) + err(1, "SIOCGVH"); + + for (i = 0; i <= CARP_BAL_MAXID; i++) + if (!strcasecmp(val, carp_bal_modes[i])) + break; + + if (i > CARP_BAL_MAXID) + errx(1, "balancing %s: unknown mode", val); + + carpr.carpr_balancing = i; + + if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1) + err(1, "SIOCSVH"); +} + +void setpfsync_syncdev(const char *val, int d) { struct pfsyncreq preq; diff --git a/sbin/sysctl/sysctl.8 b/sbin/sysctl/sysctl.8 index f94db1845d9..4dbf1da49e0 100644 --- a/sbin/sysctl/sysctl.8 +++ b/sbin/sysctl/sysctl.8 @@ -1,4 +1,4 @@ -.\" $OpenBSD: sysctl.8,v 1.142 2007/11/11 15:48:35 jmc Exp $ +.\" $OpenBSD: sysctl.8,v 1.143 2008/02/05 22:57:31 mpf Exp $ .\" $NetBSD: sysctl.8,v 1.4 1995/09/30 07:12:49 thorpej Exp $ .\" .\" Copyright (c) 1993 @@ -30,7 +30,7 @@ .\" .\" @(#)sysctl.8 8.2 (Berkeley) 5/9/95 .\" -.Dd $Mdocdate: November 11 2007 $ +.Dd $Mdocdate: February 5 2008 $ .Dt SYSCTL 8 .Os .Sh NAME @@ -277,7 +277,6 @@ not all of the variables are relevant to all architectures. .It net.inet.carp.allow integer yes .It net.inet.carp.preempt integer yes .It net.inet.carp.log integer yes -.It net.inet.carp.arpbalance integer yes .It net.inet6.ip6.forwarding integer yes .It net.inet6.ip6.redirect integer yes .It net.inet6.ip6.hlim integer yes diff --git a/share/man/man4/carp.4 b/share/man/man4/carp.4 index 2952c3e9850..427edf33504 100644 --- a/share/man/man4/carp.4 +++ b/share/man/man4/carp.4 @@ -1,4 +1,4 @@ -.\" $OpenBSD: carp.4,v 1.28 2007/09/18 09:18:04 mpf Exp $ +.\" $OpenBSD: carp.4,v 1.29 2008/02/05 22:57:31 mpf Exp $ .\" .\" Copyright (c) 2003, Ryan McBride. All rights reserved. .\" @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: September 18 2007 $ +.Dd $Mdocdate: February 5 2008 $ .Dt CARP 4 .Os .Sh NAME @@ -116,9 +116,6 @@ Log bad .Nm packets. Disabled by default. -.It net.inet.carp.arpbalance -Balance local traffic using ARP. -Disabled by default. .El .Sh LOAD BALANCING .Nm @@ -156,36 +153,55 @@ A rule of thumb might be to use ARP balancing if there are many hosts on the same network segment and to use IP balancing for all other cases. .Pp -The configuration of ARP and IP load balancing is quite similar: -a load balancing group is created out of multiple -.Nm -interfaces by configuring them with the same IP addresses, -but to different VHIDs. -All +To configure load balancing one has to specify multiple +carp nodes using the +.Cm carpnodes +option. +Each node in a load balancing cluster is represented +by at least one +.Qq Cm vhid : Ns Cm advskew +pair in a comma separated list. .Nm -nodes in the cluster are configured identically, except +tries to distribute the incoming network load over all configured carpnodes. +The following example +creates a load balancing group consisting of three nodes, +using vhids 3, 4 and 6: +.Bd -literal -offset indent +# ifconfig carp0 carpnodes 3:0,4:0,6:100 +.Ed +.Pp +The advskew value of the last node is set to 100, +so that this node is designated to the BACKUP state. +It will only become MASTER if all nodes with a lower advskew value have failed. +By varying this value throughout the machines in the cluster +it is possible to decide which share of the network load each node receives. +Therefore, all carp interfaces in the cluster are configured identically, except for a different .Cm advskew -to control which interfaces on a host will be the designated master. +value within the carpnodes specification. +.Pp See the .Sx EXAMPLES section for a practical example of load balancing. .Ss ARP BALANCING -For load balancing, several -.Nm -interfaces are configured to the same IP address, but to different VHIDs. +For ARP balancing, one has to configure multiple +.Cm carpnodes +and choose the +.Cm balancing +mode +.Ar arp . +.Pp Once an ARP request is received, the CARP protocol will use a hashing function against the source MAC address in the ARP request to determine -which VHID the request belongs to. +which carpnode the request belongs to. If the corresponding -.Nm -interface is in master state, the ARP request will be answered, otherwise +carpnode is in master state, the ARP request will be answered, otherwise it will be ignored. .Pp The ARP load balancing has some limitations. Firstly, ARP balancing only works on the local network segment. It cannot balance traffic that crosses a router, because the -router itself will always be balanced to the same virtual host. +router itself will always be balanced to the same carpnode. Secondly, ARP load balancing can lead to asymmetric routing of incoming and outgoing traffic, thus combining it with .Xr pfsync 4 @@ -199,6 +215,9 @@ This requires multiple CARP groups with .Em different IP addresses on the outgoing interface, configured so that each host is the master of one group. +.Pp +ARP balancing also works for IPv6, but instead of ARP the Neighbor Discovery +Protocol (NDP) is used. .Ss IP BALANCING IP load balancing works by utilizing the network itself to distribute incoming traffic to all @@ -211,51 +230,51 @@ packet. All the other nodes will just silently drop it. The filtering function uses a hash over the source and destination address of the IPv4 or IPv6 packet and compares the result against the -state of the -.Nm -load balancing group. +state of the carpnode. .Pp -A load balancing group consists of two or more -.Nm -interfaces per host which are configured with common IP addresses -but different VHIDs. IP balancing is activated by setting the -.Cm link0 -flag on the first interface of the group. -In most cases it is recommended to also enable the -.Cm link1 -flag. -This flag enables the stealth mode on the interface. +.Cm balancing +mode to +.Ar ip . +This is the recommended default setting. +In this mode, carp uses a multicast MAC address, so that a switch +sends incoming traffic towards all nodes. +.Pp +However, there are a few OS and routers that do not accept a multicast +MAC address being mapped to a unicast IP. +This can be resolved by using one of the following unicast options. +For scenarios where a hub is used it is not necessary to use a multicast MAC +and it is safe to use the +.Ar ip-unicast +mode. +Managable switches can usually be tricked into forwarding unicast +traffic to all cluster nodes ports by configuring them into some +sort of monitoring mode. +If this is not possible, using the +.Ar ip-stealth +mode is another option, which should work on most switches. In this mode .Nm never sends packets with its virtual MAC address as source. -This is necessary to receive incoming traffic on all hosts in switched networks. Stealth mode prevents a switch from learning the virtual MAC address, so that it has to flood the traffic to all its ports. -The -.Cm link1 -flag can be avoided -only if using a hub or if the switch ports that are connected -to the cluster nodes can be configured into some sort of monitoring mode. Please note that activating stealth mode on a .Nm interface that has already been running might not work instantly. -As a workaround the VHID can be changed to a previously unused -one, or just wait until the MAC table entry in the switch times out. -.Pp +As a workaround the VHID of the first carpnode can be changed to a +previously unused one, or just wait until the MAC table entry in the +switch times out. Some Layer-3 switches do port learning based on ARP packets. Therefore the stealth mode cannot hide the virtual MAC address from these kind of devices. -In such cases, -.Nm -can be told to use a multicast MAC address by additionally enabling the -.Cm link2 -flag. .Pp If IP balancing is being used on a firewall, it is recommended to -configure the load balancing group in a symmetrical manner. -This is achieved by prioritizing the interfaces in the same order -(ascending by VHID) on both sides of the firewall. +configure the +.Cm carpnodes +in a symmetrical manner. +This is achieved by simply using the same +.Cm carpnodes +list on all sides of the firewall. This ensures that packets of one connection will pass in and out on the same host and are not routed asymmetrically. .Sh EXAMPLES @@ -292,46 +311,32 @@ interfaces. This will cause host B to preempt on both interfaces instead of just the failed one. .Ss LOAD BALANCING -In order to set up an load balanced virtual host, it is necessary to configure -one virtual host for each physical host. -In the following example, two virtual hosts are configured on two hosts to +In order to set up a load balanced virtual host, it is necessary to configure +one +.Cm carpnodes +entry for each physical host. +In the following example, two physical hosts are configured to provide balancing and failover for the IP address 192.168.1.10. .Pp First the .Nm -interfaces on Host A are configured. +interface on Host A is configured. The .Cm advskew -of 100 on the second virtual host means that its advertisements will be sent +of 100 on the second carpnode entry means that its advertisements will be sent out slightly less frequently and will therefore become the designated backup. -.Bd -literal -offset indent -# ifconfig carp0 192.168.1.10 vhid 1 -# ifconfig carp1 192.168.1.10 vhid 2 advskew 100 -.Ed -.Pp -The configuration for host B is identical, except the skew is on -virtual host 1 rather than virtual host 2. -.Bd -literal -offset indent -# ifconfig carp0 192.168.1.10 vhid 1 advskew 100 -# ifconfig carp1 192.168.1.10 vhid 2 -.Ed .Pp -If ARP balancing is being used, it must be enabled on both hosts: +.Dl # ifconfig carp0 192.168.1.10 carpnodes 1:0,2:100 balancing ip .Pp -.Dl # sysctl net.inet.carp.arpbalance=1 +The configuration for host B is identical, except the skew is on +the carpnode entry with virtual host 1 rather than virtual host 2. .Pp -If IP balancing is being used, instead enable the -.Cm link0 -and -.Cm link1 -flags on the first interface of the load balancing group on both hosts: -.Bd -literal -offset indent -A# ifconfig carp0 192.168.1.10 vhid 1 link0 link1 -A# ifconfig carp1 192.168.1.10 vhid 2 advskew 100 +.Dl # ifconfig carp0 192.168.1.10 carpnodes 1:100,2:0 balancing ip .Pp -B# ifconfig carp0 192.168.1.10 vhid 1 advskew 100 link0 link1 -B# ifconfig carp1 192.168.1.10 vhid 2 -.Ed +If ARP balancing or a different mode of IP balancing is desired +the +.Cm balancing +mode can be adjusted accordingly. .Sh SEE ALSO .Xr sysctl 3 , .Xr inet 4 , @@ -346,3 +351,16 @@ The .Nm device first appeared in .Ox 3.5 . +.Sh BUGS +If load balancing is used in setups where the carpdev does not share +an IP in the same subnet as +.Nm , +it is not possible to use the IP of the +.Nm +interface for self originated traffic. +This is because the return packets are also subject to load balancing +and might end up on any other node in the cluster. +.Pp +If an IPv6 load balanced carp interface is taken down manually, +it will accept all incoming packets for its address. +This will lead to duplicated packets. diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index 7ea08f926be..26e2b69bd29 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ethersubr.c,v 1.113 2008/01/17 17:50:59 bluhm Exp $ */ +/* $OpenBSD: if_ethersubr.c,v 1.114 2008/02/05 22:57:30 mpf Exp $ */ /* $NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $ */ /* @@ -564,12 +564,10 @@ ether_input(ifp, eh, m) (carp_input(m, (u_int8_t *)&eh->ether_shost, (u_int8_t *)&eh->ether_dhost, eh->ether_type) == 0)) return; - /* Always clear multicast flags if received on a carp address */ + /* clear mcast if received on a carp IP balanced address */ else if (ifp->if_type == IFT_CARP && - ifp->if_flags & IFF_LINK2 && m->m_flags & (M_BCAST|M_MCAST) && - !bcmp(((struct arpcom *)ifp)->ac_enaddr, - (caddr_t)eh->ether_dhost, ETHER_ADDR_LEN)) + carp_our_mcastaddr(ifp, (u_int8_t *)&eh->ether_dhost)) m->m_flags &= ~(M_BCAST|M_MCAST); } #endif /* NCARP > 0 */ diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c index fa9e562331e..4704fed67cc 100644 --- a/sys/netinet/if_ether.c +++ b/sys/netinet/if_ether.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ether.c,v 1.69 2007/11/06 21:52:00 miod Exp $ */ +/* $OpenBSD: if_ether.c,v 1.70 2008/02/05 22:57:30 mpf Exp $ */ /* $NetBSD: if_ether.c,v 1.31 1996/05/11 12:59:58 mycroft Exp $ */ /* @@ -531,13 +531,13 @@ in_arpinput(m) #if NBRIDGE > 0 struct in_ifaddr *bridge_ia = NULL; #endif -#if NCARP > 0 - u_int32_t count = 0, index = 0; -#endif struct sockaddr_dl *sdl; struct sockaddr sa; struct in_addr isaddr, itaddr, myaddr; u_int8_t *enaddr = NULL; +#if NCARP > 0 + u_int8_t *ether_shost = NULL; +#endif int op; ea = mtod(m, struct ether_arp *); @@ -563,10 +563,10 @@ in_arpinput(m) if (ia->ia_ifp->if_type == IFT_CARP && ((ia->ia_ifp->if_flags & (IFF_UP|IFF_RUNNING)) == (IFF_UP|IFF_RUNNING))) { - index++; if (ia->ia_ifp == m->m_pkthdr.rcvif && + (op == ARPOP_REPLY || carp_iamatch(ia, ea->arp_sha, - &count, index)) + &enaddr, ðer_shost))) break; } else #endif @@ -589,7 +589,7 @@ in_arpinput(m) m->m_pkthdr.rcvif->if_bridge == ia->ia_ifp->if_carpdev->if_bridge && carp_iamatch(ia, ea->arp_sha, - &count, index)) + &enaddr, ðer_shost)) bridge_ia = ia; #endif } @@ -731,12 +731,10 @@ reply: eh = (struct ether_header *)sa.sa_data; bcopy(ea->arp_tha, eh->ether_dhost, sizeof(eh->ether_dhost)); #if NCARP > 0 - if (ac->ac_if.if_type == IFT_CARP && ac->ac_if.if_flags & IFF_LINK1) - bcopy(((struct arpcom *)ac->ac_if.if_carpdev)->ac_enaddr, - eh->ether_shost, sizeof(eh->ether_shost)); - else + if (ether_shost) + enaddr = ether_shost; #endif - bcopy(enaddr, eh->ether_shost, sizeof(eh->ether_shost)); + bcopy(enaddr, eh->ether_shost, sizeof(eh->ether_shost)); eh->ether_type = htons(ETHERTYPE_ARP); sa.sa_family = pseudo_AF_HDRCMPLT; diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 597809f0e5b..82f9ba870e7 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_carp.c,v 1.160 2008/01/02 03:33:18 brad Exp $ */ +/* $OpenBSD: ip_carp.c,v 1.161 2008/02/05 22:57:30 mpf Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff. All rights reserved. @@ -107,7 +107,7 @@ enum { HMAC_ORIG=0, HMAC_NOV6LL=1, HMAC_MAX=2 }; struct carp_vhost_entry { LIST_ENTRY(carp_vhost_entry) vhost_entries; struct carp_softc *parent_sc; - int master; + int vhe_leader; int vhid; int advskew; enum { INIT = 0, BACKUP, MASTER } state; @@ -123,6 +123,7 @@ struct carp_vhost_entry { SHA1_CTX vhe_sha1[HMAC_MAX]; u_int8_t vhe_enaddr[ETHER_ADDR_LEN]; + struct sockaddr_dl vhe_sdl; /* for IPv6 ndp balancing */ }; struct carp_softc { @@ -151,6 +152,7 @@ struct carp_softc { int sc_vhe_count; u_int8_t sc_vhids[CARP_MAXNODES]; u_int8_t sc_advskews[CARP_MAXNODES]; + u_int8_t sc_balancing; int sc_naddrs; int sc_naddrs6; @@ -162,14 +164,13 @@ struct carp_softc { u_int32_t sc_hashkey[2]; u_int32_t sc_lsmask; /* load sharing mask */ int sc_lscount; /* # load sharing interfaces (max 32) */ - int sc_delayed_arp; /* delayed ARP request countdown */ LIST_HEAD(__carp_mchead, carp_mc_entry) carp_mc_listhead; struct carp_vhost_entry *cur_vhe; /* current active vhe */ }; -int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0, 0 }; /* XXX for now */ +int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 0 }; /* XXX for now */ struct carpstats carpstats; struct carp_if { @@ -208,7 +209,7 @@ void carp_send_ad(void *); void carp_send_arp(struct carp_softc *); void carp_master_down(void *); int carp_ioctl(struct ifnet *, u_long, caddr_t); -int carp_vhids_ioctl(struct carp_softc *, struct carpreq); +int carp_vhids_ioctl(struct carp_softc *, struct carpreq *); int carp_check_dup_vhids(struct carp_softc *, struct carp_if *, struct carpreq *); void carp_ifgroup_ioctl(struct ifnet *, u_long, caddr_t); @@ -218,8 +219,6 @@ void carp_setrun_all(struct carp_softc *, sa_family_t); void carp_setrun(struct carp_vhost_entry *, sa_family_t); void carp_set_state_all(struct carp_softc *, int); void carp_set_state(struct carp_vhost_entry *, int); -int carp_addrcount(struct carp_if *, struct ifaddr *, int); -enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING, CARP_COUNT_LINK0 }; void carp_multicast_cleanup(struct carp_softc *); int carp_set_ifp(struct carp_softc *, struct ifnet *); void carp_set_enaddr(struct carp_softc *); @@ -291,7 +290,7 @@ carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) SHA1Update(&vhe->vhe_sha1[ctx], (void *)&type, sizeof(type)); /* generate a key for the arpbalance hash, before the vhid is hashed */ - if (vhe->master) { + if (vhe->vhe_leader) { bcopy(&vhe->vhe_sha1[ctx], &sha1ctx, sizeof(sha1ctx)); SHA1Final((unsigned char *)kmd, &sha1ctx); sc->sc_hashkey[0] = kmd[0] ^ kmd[1]; @@ -299,7 +298,7 @@ carp_hmac_prepare_ctx(struct carp_vhost_entry *vhe, u_int8_t ctx) } /* the rest of the precomputation */ - if (vhe->master && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, + if (vhe->vhe_leader && bcmp(sc->sc_ac.ac_enaddr, vhe->vhe_enaddr, ETHER_ADDR_LEN) != 0) SHA1Update(&vhe->vhe_sha1[ctx], sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN); @@ -404,7 +403,7 @@ carp_setroute(struct carp_softc *sc, int cmd) TAILQ_FOREACH(ifa, &sc->sc_if.if_addrlist, ifa_list) { switch (ifa->ifa_addr->sa_family) { case AF_INET: { - int count = 0, error; + int error; struct sockaddr sa; struct rtentry *rt; struct radix_node_head *rnh; @@ -414,21 +413,6 @@ carp_setroute(struct carp_softc *sc, int cmd) struct sockaddr_rtlabel sa_rl; const char *label; - /* - * Avoid screwing with the routes if there are other - * carp interfaces which are master and have the same - * address. - */ - if (sc->sc_carpdev != NULL && - sc->sc_carpdev->if_carp != NULL) { - count = carp_addrcount( - (struct carp_if *)sc->sc_carpdev->if_carp, - ifa, CARP_COUNT_MASTER); - if ((cmd == RTM_ADD && count != 1) || - (cmd == RTM_DELETE && count != 0)) - continue; - } - /* Remove the existing host route, if any */ bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = ifa->ifa_addr; @@ -515,6 +499,8 @@ carp_setroute(struct carp_softc *sc, int cmd) #ifdef INET6 case AF_INET6: + if (sc->sc_balancing >= CARP_BAL_IP) + continue; if (cmd == RTM_ADD) in6_ifaddloop(ifa); else @@ -748,7 +734,7 @@ carp_proto_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) timeout_del(&vhe->ad_tmo); carp_set_state(vhe, BACKUP); carp_setrun(vhe, 0); - if (vhe->master) + if (vhe->vhe_leader) carp_setroute(sc, RTM_DELETE); } break; @@ -903,9 +889,9 @@ carp_new_vhost(struct carp_softc *sc, int vhid, int advskew) timeout_set(&vhe->md_tmo, carp_master_down, vhe); timeout_set(&vhe->md6_tmo, carp_master_down, vhe); - /* mark the first vhe as master */ + /* mark the first vhe as leader */ if (LIST_EMPTY(&sc->carp_vhosts)) { - vhe->master = 1; + vhe->vhe_leader = 1; LIST_INSERT_HEAD(&sc->carp_vhosts, vhe, vhost_entries); sc->sc_vhe_count = 1; return (0); @@ -1187,7 +1173,7 @@ carp_send_ad(void *v) } else sc->sc_sendad_errors = 0; } - if (vhe->master) { + if (vhe->vhe_leader) { if (sc->sc_delayed_arp > 0) sc->sc_delayed_arp--; if (sc->sc_delayed_arp == 0) { @@ -1298,10 +1284,6 @@ carp_send_arp(struct carp_softc *sc) if (ifa->ifa_addr->sa_family != AF_INET) continue; - if (carp_addrcount((struct carp_if *)sc->sc_carpdev->if_carp, - ifa, CARP_COUNT_LINK0)) - continue; - in = ifatoia(ifa)->ia_addr.sin_addr.s_addr; arprequest(sc->sc_carpdev, &in, &in, sc->sc_ac.ac_enaddr); DELAY(1000); /* XXX */ @@ -1368,153 +1350,36 @@ carp_hash(struct carp_softc *sc, u_char *src) return (c); } -int -carp_addrcount(struct carp_if *cif, struct ifaddr *ifa0, int type) -{ - struct carp_softc *vh; - struct ifaddr *ifa; - int count = 0; - - TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - switch (type) { - case CARP_COUNT_RUNNING: - if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != - (IFF_UP|IFF_RUNNING)) - continue; - break; - case CARP_COUNT_MASTER: - if (LIST_FIRST(&vh->carp_vhosts)->state != MASTER) - continue; - break; - case CARP_COUNT_LINK0: - if (!(vh->sc_if.if_flags & IFF_LINK0) || - (vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != - (IFF_UP|IFF_RUNNING)) - continue; - break; - } - TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family == AF_INET && - ifa0->ifa_addr->sa_family == AF_INET && - ifatoia(ifa0)->ia_addr.sin_addr.s_addr == - ifatoia(ifa)->ia_addr.sin_addr.s_addr) - count++; -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6 && - ifa0->ifa_addr->sa_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa0), IFA_IN6(ifa))) - count++; -#endif - } - } - return (count); -} - void carp_update_lsmask(struct carp_softc *sc) { - struct carp_softc *curvh, *vh, *sc0 = NULL; - struct carp_if *cif; struct carp_vhost_entry *vhe; - struct ifaddr *ifa, *ifa0 = NULL; - int cur, last, count, found; - - if (!sc->sc_carpdev) - return; - cif = (struct carp_if *)sc->sc_carpdev->if_carp; + int count; - /* - * Take the first IPv4 address from the LINK0 carp interface - * to determine the load sharing group. - * Fallback on the first IPv6 address. - */ - TAILQ_FOREACH(sc0, &cif->vhif_vrs, sc_list) - if (sc0->sc_if.if_flags & IFF_LINK0) - break; - if (sc0 == NULL) + if (!sc->sc_balancing) return; - TAILQ_FOREACH(ifa0, &sc0->sc_if.if_addrlist, ifa_list) - if (ifa0->ifa_addr->sa_family == AF_INET) - break; -#ifdef INET6 - if (ifa0 == NULL) - TAILQ_FOREACH(ifa0, &sc0->sc_if.if_addrlist, ifa_list) - if (ifa0->ifa_addr->sa_family == AF_INET6 && - !IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa0))) - break; -#endif - if (ifa0 == NULL) - return; - /* - * Calculate the load sharing mask w/ all carp interfaces - * that share the first address of the LINK0 interface. - * Sort by virtual host ID. - */ - sc0->sc_lsmask = 0; - cur = 0; - curvh = NULL; + sc->sc_lsmask = 0; count = 0; - do { - found = 0; - last = cur; - cur = 255; - TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - vhe = LIST_FIRST(&vh->carp_vhosts); - if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != - (IFF_UP|IFF_RUNNING)) - continue; - TAILQ_FOREACH(ifa, &vh->sc_if.if_addrlist, ifa_list) { - if (ifa->ifa_addr->sa_family == AF_INET && - ifa0->ifa_addr->sa_family == AF_INET && - ifatoia(ifa0)->ia_addr.sin_addr.s_addr == - ifatoia(ifa)->ia_addr.sin_addr.s_addr) - break; -#ifdef INET6 - if (ifa->ifa_addr->sa_family == AF_INET6 && - ifa0->ifa_addr->sa_family == AF_INET6 && - IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa0), IFA_IN6(ifa))) - break; -#endif - } - if (ifa && vhe->vhid > last && vhe->vhid < cur) { - cur = vhe->vhid; - curvh = vh; - found++; - } - } - if (found) { - if (LIST_FIRST(&curvh->carp_vhosts)->state == MASTER && - count < sizeof(sc0->sc_lsmask) * 8) - sc0->sc_lsmask |= 1 << count; - count++; - } - } while (found); - - sc0->sc_lscount = count; - if (count == 0) - return; - CARP_LOG(sc, ("carp_update_lsmask: %x", sc0->sc_lsmask)) + LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { + if (vhe->state == MASTER && count < sizeof(sc->sc_lsmask) * 8) + sc->sc_lsmask |= 1 << count; + count++; + } + sc->sc_lscount = count; + CARP_LOG(sc, ("carp_update_lsmask: %x", sc->sc_lsmask)); } int -carp_iamatch(struct in_ifaddr *ia, u_char *src, - u_int32_t *count, u_int32_t index) +carp_iamatch(struct in_ifaddr *ia, u_char *src, u_int8_t **sha, + u_int8_t **ether_shost) { struct carp_softc *sc = ia->ia_ifp->if_softc; struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); - /* - * If the asked address is found on a LINK0 interface - * don't answer the arp reply unless we are MASTER on it. - */ - if (!(sc->sc_if.if_flags & IFF_LINK0) && sc->sc_carpdev && - carp_addrcount((struct carp_if *)sc->sc_carpdev->if_carp, - (struct ifaddr *)ia, CARP_COUNT_LINK0)) - return (0); - - if (carp_opts[CARPCTL_ARPBALANCE]) { + if (sc->sc_balancing == CARP_BAL_ARP) { + int lshash; /* * We use the source MAC address to decide which virtual host * should handle the request. If we're master of that virtual @@ -1522,18 +1387,25 @@ carp_iamatch(struct in_ifaddr *ia, u_char *src, * on the floor. */ - /* Count the eligible carp interfaces with this address */ - if (*count == 0) - *count = carp_addrcount( - (struct carp_if *)ia->ia_ifp->if_carpdev->if_carp, - (struct ifaddr *)ia, CARP_COUNT_RUNNING); - - /* This should never happen, but... */ - if (*count == 0) + if (sc->sc_lscount == 0) /* just to be safe */ return (0); - - if (carp_hash(sc, src) % *count == index - 1 && - LIST_FIRST(&sc->carp_vhosts)->state == MASTER) { + lshash = carp_hash(sc, src) % sc->sc_lscount; + if ((1 << lshash) & sc->sc_lsmask) { + int i = 0; + LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { + if (i++ == lshash) + break; + } + if (vhe == NULL) + return (0); + *sha = vhe->vhe_enaddr; + return (1); + } + } else if (sc->sc_balancing == CARP_BAL_IPSTEALTH || + sc->sc_balancing == CARP_BAL_IP) { + if (vhe->state == MASTER) { + *ether_shost = ((struct arpcom *)sc->sc_carpdev)-> + ac_enaddr; return (1); } } else { @@ -1546,21 +1418,41 @@ carp_iamatch(struct in_ifaddr *ia, u_char *src, #ifdef INET6 int -carp_iamatch6(struct ifnet *ifp, struct ifaddr *ifa) +carp_iamatch6(struct ifnet *ifp, u_char *src, struct sockaddr_dl **sdl) { struct carp_softc *sc = ifp->if_softc; + struct carp_vhost_entry *vhe = LIST_FIRST(&sc->carp_vhosts); - /* - * If the asked address is found on a LINK0 interface - * don't answer the arp request unless we are MASTER on it. - */ - if (!(sc->sc_if.if_flags & IFF_LINK0) && sc->sc_carpdev && - carp_addrcount((struct carp_if *)sc->sc_carpdev->if_carp, - ifa, CARP_COUNT_LINK0)) - return (0); + if (sc->sc_balancing == CARP_BAL_ARP) { + int lshash; + /* + * We use the source MAC address to decide which virtual host + * should handle the request. If we're master of that virtual + * host, then we respond, otherwise, just drop the ndp packet + * on the floor. + */ - if (LIST_FIRST(&sc->carp_vhosts)->state == MASTER) - return (1); + /* can happen if optional src lladdr is not provided */ + if (src == NULL) + return (0); + if (sc->sc_lscount == 0) /* just to be safe */ + return (0); + lshash = carp_hash(sc, src) % sc->sc_lscount; + if ((1 << lshash) & sc->sc_lsmask) { + int i = 0; + LIST_FOREACH(vhe, &sc->carp_vhosts, vhost_entries) { + if (i++ == lshash) + break; + } + if (vhe == NULL) + return (0); + *sdl = &vhe->vhe_sdl; + return (1); + } + } else { + if (vhe->state == MASTER) + return (1); + } return (0); } @@ -1579,14 +1471,22 @@ carp_ourether(void *v, struct ether_header *eh, u_char iftype, int src) ena = (u_int8_t *)&eh->ether_dhost; TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) { - /* XXX need to check all vhes */ - struct carp_vhost_entry *vhe = LIST_FIRST(&vh->carp_vhosts); + struct carp_vhost_entry *vhe; if ((vh->sc_if.if_flags & (IFF_UP|IFF_RUNNING)) != (IFF_UP|IFF_RUNNING)) continue; - if ((vhe->state == MASTER || vh->sc_if.if_flags & IFF_LINK0) - && !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) - return (&vh->sc_if); + if (vh->sc_balancing == CARP_BAL_ARP) { + LIST_FOREACH(vhe, &vh->carp_vhosts, vhost_entries) + if (vhe->state == MASTER && + !bcmp(ena, vhe->vhe_enaddr, ETHER_ADDR_LEN)) + return (&vh->sc_if); + } else { + vhe = LIST_FIRST(&vh->carp_vhosts); + if ((vhe->state == MASTER || + vh->sc_balancing >= CARP_BAL_IP) && + !bcmp(ena, vh->sc_ac.ac_enaddr, ETHER_ADDR_LEN)) + return (&vh->sc_if); + } } return (NULL); } @@ -1596,8 +1496,9 @@ carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) { struct carp_softc *sc = ifp->if_softc; - if (!(ifp->if_flags & IFF_LINK1) && sc->cur_vhe) { - if (sc->cur_vhe->master) + if (sc->sc_balancing != CARP_BAL_IPSTEALTH && + sc->sc_balancing != CARP_BAL_IP && sc->cur_vhe) { + if (sc->cur_vhe->vhe_leader) bcopy((caddr_t)sc->sc_ac.ac_enaddr, (caddr_t)s_enaddr, ETHER_ADDR_LEN); else @@ -1607,6 +1508,18 @@ carp_rewrite_lladdr(struct ifnet *ifp, u_int8_t *s_enaddr) } int +carp_our_mcastaddr(struct ifnet *ifp, u_int8_t *d_enaddr) +{ + struct carp_softc *sc = ifp->if_softc; + + if (sc->sc_balancing != CARP_BAL_IP) + return (0); + + return(!bcmp(sc->sc_ac.ac_enaddr, d_enaddr, ETHER_ADDR_LEN)); +} + + +int carp_input(struct mbuf *m, u_int8_t *shost, u_int8_t *dhost, u_int16_t etype) { struct ether_header eh; @@ -1660,6 +1573,8 @@ carp_lsdrop(struct mbuf *m, sa_family_t af, u_int32_t *src, u_int32_t *dst) int match; u_int32_t fold; + if (sc->sc_balancing < CARP_BAL_IP) + return (0); /* * Never drop carp advertisements. * XXX Bad idea to pass all broadcast / multicast traffic? @@ -1698,7 +1613,7 @@ carp_master_down(void *v) case BACKUP: carp_set_state(vhe, MASTER); carp_send_ad(vhe); - if (vhe->master) { + if (sc->sc_balancing == CARP_BAL_NONE && vhe->vhe_leader) { carp_send_arp(sc); /* Schedule a delayed ARP to deal w/ some L3 switches */ sc->sc_delayed_arp = 2; @@ -1707,7 +1622,7 @@ carp_master_down(void *v) #endif /* INET6 */ } carp_setrun(vhe, 0); - if (vhe->master) + if (vhe->vhe_leader) carp_setroute(sc, RTM_ADD); carpstats.carps_preempt++; break; @@ -1744,7 +1659,7 @@ carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) sc->sc_if.if_flags |= IFF_RUNNING; } else { sc->sc_if.if_flags &= ~IFF_RUNNING; - if (vhe->master) + if (vhe->vhe_leader) carp_setroute(sc, RTM_DELETE); return; } @@ -1752,7 +1667,7 @@ carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) switch (vhe->state) { case INIT: carp_set_state(vhe, BACKUP); - if (vhe->master) + if (vhe->vhe_leader) carp_setroute(sc, RTM_DELETE); carp_setrun(vhe, 0); break; @@ -1760,7 +1675,7 @@ carp_setrun(struct carp_vhost_entry *vhe, sa_family_t af) timeout_del(&vhe->ad_tmo); tv.tv_sec = 3 * sc->sc_advbase; tv.tv_usec = vhe->advskew * 1000000 / 256; - if (vhe->master) + if (vhe->vhe_leader) sc->sc_delayed_arp = -1; switch (af) { #ifdef INET @@ -1924,7 +1839,7 @@ carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) struct carp_softc *sc = vhe->parent_sc; if (vhe->vhid != 0 && sc->sc_carpdev) { - if (vhe->master && sc->sc_if.if_flags & IFF_LINK2) + if (vhe->vhe_leader && sc->sc_balancing == CARP_BAL_IP) vhe->vhe_enaddr[0] = 1; else vhe->vhe_enaddr[0] = 0; @@ -1933,6 +1848,10 @@ carp_set_vhe_enaddr(struct carp_vhost_entry *vhe) vhe->vhe_enaddr[3] = 0; vhe->vhe_enaddr[4] = 1; vhe->vhe_enaddr[5] = vhe->vhid; + + vhe->vhe_sdl.sdl_family = AF_LINK; + vhe->vhe_sdl.sdl_alen = ETHER_ADDR_LEN; + bcopy(vhe->vhe_enaddr, vhe->vhe_sdl.sdl_data, ETHER_ADDR_LEN); } else bzero(vhe->vhe_enaddr, ETHER_ADDR_LEN); } @@ -2284,9 +2203,6 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) sc->sc_if.if_flags |= IFF_UP; carp_setrun_all(sc, 0); } - carp_set_enaddr(sc); /* for changes on LINK2 */ - if (ifr->ifr_flags & IFF_LINK0) - carp_update_lsmask(sc); break; case SIOCSVH: @@ -2318,7 +2234,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) break; } } - if ((error = carp_vhids_ioctl(sc, carpr))) + if ((error = carp_vhids_ioctl(sc, &carpr))) return (error); if (carpr.carpr_advbase > 0) { if (carpr.carpr_advbase > 255) { @@ -2336,6 +2252,15 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) bcopy(carpr.carpr_advskews, sc->sc_advskews, sizeof(sc->sc_advskews)); } + if (sc->sc_balancing != carpr.carpr_balancing) { + if (carpr.carpr_balancing > CARP_BAL_MAXID) { + error = EINVAL; + break; + } + sc->sc_balancing = carpr.carpr_balancing; + carp_set_enaddr(sc); + carp_update_lsmask(sc); + } bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); if (error > 0) error = EINVAL; @@ -2358,6 +2283,7 @@ carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr) i++; } carpr.carpr_advbase = sc->sc_advbase; + carpr.carpr_balancing = sc->sc_balancing; if (suser(p, p->p_acflag) == 0) bcopy(sc->sc_key, carpr.carpr_key, sizeof(carpr.carpr_key)); @@ -2417,42 +2343,53 @@ carp_check_dup_vhids(struct carp_softc *sc, struct carp_if *cif, } int -carp_vhids_ioctl(struct carp_softc *sc, struct carpreq carpr) +carp_vhids_ioctl(struct carp_softc *sc, struct carpreq *carpr) { - int i; + int i, j; u_int8_t taken_vhids[256]; - if (carpr.carpr_vhids[0] == 0 || - !bcmp(sc->sc_vhids, carpr.carpr_vhids, sizeof(sc->sc_vhids))) + if (carpr->carpr_vhids[0] == 0 || + !bcmp(sc->sc_vhids, carpr->carpr_vhids, sizeof(sc->sc_vhids))) return (0); bzero(taken_vhids, sizeof(taken_vhids)); - for (i = 0; carpr.carpr_vhids[i]; i++) { - if (taken_vhids[carpr.carpr_vhids[i]]) + for (i = 0; carpr->carpr_vhids[i]; i++) { + if (taken_vhids[carpr->carpr_vhids[i]]) return (EINVAL); - taken_vhids[carpr.carpr_vhids[i]] = 1; + taken_vhids[carpr->carpr_vhids[i]] = 1; if (sc->sc_carpdev) { struct carp_if *cif; cif = (struct carp_if *)sc->sc_carpdev->if_carp; - if (carp_check_dup_vhids(sc, cif, &carpr)) + if (carp_check_dup_vhids(sc, cif, carpr)) return (EINVAL); } - if (carpr.carpr_advskews[i] >= 255) + if (carpr->carpr_advskews[i] >= 255) return (EINVAL); } + /* set sane balancing defaults */ + if (i <= 1) + carpr->carpr_balancing = CARP_BAL_NONE; + else if (carpr->carpr_balancing == CARP_BAL_NONE && + sc->sc_balancing == CARP_BAL_NONE) + carpr->carpr_balancing = CARP_BAL_IP; /* destroy all */ carp_del_all_timeouts(sc); carp_destroy_vhosts(sc); bzero(sc->sc_vhids, sizeof(sc->sc_vhids)); - for (i = 0; carpr.carpr_vhids[i]; i++) { - if (carp_new_vhost(sc, carpr.carpr_vhids[i], - carpr.carpr_advskews[i])) - return (ENOMEM); - sc->sc_vhids[i] = carpr.carpr_vhids[i]; - sc->sc_advskews[i] = carpr.carpr_advskews[i]; + /* sort vhosts list by vhid */ + for (j = 1; j <= 255; j++) { + for (i = 0; carpr->carpr_vhids[i]; i++) { + if (carpr->carpr_vhids[i] != j) + continue; + if (carp_new_vhost(sc, carpr->carpr_vhids[i], + carpr->carpr_advskews[i])) + return (ENOMEM); + sc->sc_vhids[i] = carpr->carpr_vhids[i]; + sc->sc_advskews[i] = carpr->carpr_advskews[i]; + } } carp_set_enaddr(sc); carp_set_state_all(sc, INIT); @@ -2508,7 +2445,8 @@ carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa, vhe = sc->cur_vhe ? sc->cur_vhe : LIST_FIRST(&sc->carp_vhosts); - if (sc->sc_carpdev != NULL && vhe->state == MASTER) + if (sc->sc_carpdev != NULL && + (sc->sc_balancing || vhe->state == MASTER)) return (sc->sc_carpdev->if_output(ifp, m, sa, rt)); else { m_freem(m); @@ -2537,7 +2475,7 @@ carp_set_state(struct carp_vhost_entry *vhe, int state) carp_update_lsmask(sc); /* only the master vhe creates link state messages */ - if (!vhe->master) + if (!vhe->vhe_leader) return; switch (state) { diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h index f2d9bf7b046..857d8563c2b 100644 --- a/sys/netinet/ip_carp.h +++ b/sys/netinet/ip_carp.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_carp.h,v 1.25 2007/12/14 18:33:40 deraadt Exp $ */ +/* $OpenBSD: ip_carp.h,v 1.26 2008/02/05 22:57:31 mpf Exp $ */ /* * Copyright (c) 2002 Michael Shalayeff. All rights reserved. @@ -132,6 +132,14 @@ struct carpreq { u_int8_t carpr_vhids[CARP_MAXNODES]; u_int8_t carpr_advskews[CARP_MAXNODES]; u_int8_t carpr_states[CARP_MAXNODES]; +#define CARP_BAL_MODES "none", "arp", "ip", "ip-stealth", "ip-unicast" +#define CARP_BAL_NONE 0 +#define CARP_BAL_ARP 1 +#define CARP_BAL_IP 2 +#define CARP_BAL_IPSTEALTH 3 +#define CARP_BAL_IPUNICAST 4 +#define CARP_BAL_MAXID 4 + u_int8_t carpr_balancing; int carpr_advbase; unsigned char carpr_key[CARP_KEY_LEN]; }; @@ -142,16 +150,14 @@ struct carpreq { #define CARPCTL_ALLOW 1 /* accept incoming CARP packets */ #define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */ #define CARPCTL_LOG 3 /* log bad packets */ -#define CARPCTL_ARPBALANCE 4 /* balance arp responses */ -#define CARPCTL_STATS 5 /* CARP stats */ -#define CARPCTL_MAXID 6 +#define CARPCTL_STATS 4 /* CARP stats */ +#define CARPCTL_MAXID 5 #define CARPCTL_NAMES { \ { 0, 0 }, \ { "allow", CTLTYPE_INT }, \ { "preempt", CTLTYPE_INT }, \ { "log", CTLTYPE_INT }, \ - { "arpbalance", CTLTYPE_INT }, \ { "stats", CTLTYPE_STRUCT }, \ } @@ -161,9 +167,9 @@ void carp_proto_input (struct mbuf *, ...); void carp_carpdev_state(void *); void carp_group_demote_adj(struct ifnet *, int); int carp6_proto_input(struct mbuf **, int *, int); -int carp_iamatch(struct in_ifaddr *, u_char *, - u_int32_t *, u_int32_t); -int carp_iamatch6(struct ifnet *, struct ifaddr *); +int carp_iamatch(struct in_ifaddr *, u_char *, u_int8_t **, + u_int8_t **); +int carp_iamatch6(struct ifnet *, u_char *, struct sockaddr_dl **); struct ifnet *carp_ourether(void *, struct ether_header *, u_char, int); int carp_input(struct mbuf *, u_int8_t *, u_int8_t *, u_int16_t); int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *, @@ -171,5 +177,6 @@ int carp_output(struct ifnet *, struct mbuf *, struct sockaddr *, int carp_sysctl(int *, u_int, void *, size_t *, void *, size_t); int carp_lsdrop(struct mbuf *, sa_family_t, u_int32_t *, u_int32_t *); void carp_rewrite_lladdr(struct ifnet *, u_int8_t *); +int carp_our_mcastaddr(struct ifnet *, u_int8_t *); #endif /* _KERNEL */ #endif /* _NETINET_IP_CARP_H_ */ diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c index bd727af3b58..664a14cd0a6 100644 --- a/sys/netinet/ip_icmp.c +++ b/sys/netinet/ip_icmp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_icmp.c,v 1.78 2007/12/13 20:00:53 reyk Exp $ */ +/* $OpenBSD: ip_icmp.c,v 1.79 2008/02/05 22:57:31 mpf Exp $ */ /* $NetBSD: ip_icmp.c,v 1.19 1996/02/13 23:42:22 christos Exp $ */ /* @@ -452,7 +452,6 @@ icmp_input(struct mbuf *m, ...) icmpsrc.sin_addr = icp->icmp_ip.ip_dst; #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr, &ip->ip_dst.s_addr)) goto freeit; @@ -530,7 +529,6 @@ icmp_input(struct mbuf *m, ...) reflect: #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, &ip->ip_dst.s_addr)) goto freeit; @@ -579,7 +577,6 @@ reflect: icmpsrc.sin_addr = icp->icmp_ip.ip_dst; #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && carp_lsdrop(m, AF_INET, &icmpsrc.sin_addr.s_addr, &ip->ip_dst.s_addr)) goto freeit; diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index d8d3c01a4b4..2b691ffa1ec 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_input.c,v 1.156 2007/12/14 18:33:41 deraadt Exp $ */ +/* $OpenBSD: ip_input.c,v 1.157 2008/02/05 22:57:31 mpf Exp $ */ /* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */ /* @@ -351,10 +351,8 @@ ipv4_input(m) #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && - ip->ip_p != IPPROTO_ICMP && - carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, - &ip->ip_dst.s_addr)) + ip->ip_p != IPPROTO_ICMP && carp_lsdrop(m, AF_INET, + &ip->ip_src.s_addr, &ip->ip_dst.s_addr)) goto bad; #endif @@ -452,10 +450,8 @@ ipv4_input(m) #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && - ip->ip_p == IPPROTO_ICMP && - carp_lsdrop(m, AF_INET, &ip->ip_src.s_addr, - &ip->ip_dst.s_addr)) + ip->ip_p == IPPROTO_ICMP && carp_lsdrop(m, AF_INET, + &ip->ip_src.s_addr, &ip->ip_dst.s_addr)) goto bad; #endif /* diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 4a0e360a936..8800a65f449 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: icmp6.c,v 1.94 2007/06/01 00:52:38 henning Exp $ */ +/* $OpenBSD: icmp6.c,v 1.95 2008/02/05 22:57:31 mpf Exp $ */ /* $KAME: icmp6.c,v 1.217 2001/06/20 15:03:29 jinmei Exp $ */ /* @@ -481,7 +481,6 @@ icmp6_input(mp, offp, proto) #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && icmp6->icmp6_type == ICMP6_ECHO_REQUEST && carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32)) diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 2459d5d2688..f101308eb9d 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_input.c,v 1.80 2007/12/14 18:33:41 deraadt Exp $ */ +/* $OpenBSD: ip6_input.c,v 1.81 2008/02/05 22:57:31 mpf Exp $ */ /* $KAME: ip6_input.c,v 1.188 2001/03/29 05:34:31 itojun Exp $ */ /* @@ -258,7 +258,6 @@ ip6_input(m) #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && ip6->ip6_nxt != IPPROTO_ICMPV6 && carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32)) @@ -552,7 +551,6 @@ ip6_input(m) #if NCARP > 0 if (m->m_pkthdr.rcvif->if_type == IFT_CARP && - m->m_pkthdr.rcvif->if_flags & IFF_LINK0 && ip6->ip6_nxt == IPPROTO_ICMPV6 && carp_lsdrop(m, AF_INET6, ip6->ip6_src.s6_addr32, ip6->ip6_dst.s6_addr32)) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 6168ed52a84..aaa5ceccab4 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: nd6_nbr.c,v 1.46 2007/12/09 21:24:58 hshoexer Exp $ */ +/* $OpenBSD: nd6_nbr.c,v 1.47 2008/02/05 22:57:31 mpf Exp $ */ /* $KAME: nd6_nbr.c,v 1.61 2001/02/10 16:06:14 jinmei Exp $ */ /* @@ -195,7 +195,7 @@ nd6_ns_input(m, off, icmp6len) #if NCARP > 0 if (ifp->if_type == IFT_CARP) { ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); - if (ifa && !carp_iamatch6(ifp, ifa)) + if (ifa && !carp_iamatch6(ifp, lladdr, &proxydl)) ifa = NULL; } else { ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6); |