DragonFly submit List (threaded) for 2007-05
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]
Second patch for CARP
Hi all,
Here is an update of the patch, sent by Jonathan Buschmann on the kernel
mailing list (at the 14 march).
Differences between this patch and the first-one submitted on the kernel
mailing list :
- The announced IPv6 bug corrected (CARP interface which did not reply
to Neighbor Sollicitation)
- Some man pages was imported/updated
- LINT config file updated
- Code updated for "syncing" with CVS.
- Suppression of the code for "Report link states changes" witch was
in "conflict")
- The patch was generated (and tested) with the RSYNC/Devel snapshot
from May 20. (We had some difficulties with accessing the CVS
server for keeping our sources up to date. :-( )
- Code cleanup (nuke some commented-out code)
Notes :
- As Sacha Wildner suggest to Jonathan, we should add IFM_CARP to
ifmedia(4) man page. But, on reading other BSD man pages, they didn't
do that ; Should it be or not, and what add ? (According to the fact
that IFM_CARP is a "virtual" media")
- English is not our native language and the man page of PFSYNC from
DragonflyBSD is not the same than others BSD. So I added a paragraph
with CARP (imported from FreeBSD man page) ; It is just an example "in
the middle of nowhere in the man page" ; I find that a little bit
strange, may be there is a better choice...
- A lot of other things (I don't remember what...)
Regards,
--
Baptiste Ritter
diff --git a/etc/protocols b/etc/protocols
index 4399da5..fc04f48 100644
--- a/etc/protocols
+++ b/etc/protocols
@@ -120,7 +120,7 @@ ipcomp 108 IPComp # IP Payload Compress
snp 109 SNP # Sitara Networks Protocol
compaq-peer 110 Compaq-Peer # Compaq Peer Protocol
ipx-in-ip 111 IPX-in-IP # IPX in IP
-vrrp 112 VRRP # Virtual Router Redundancy Protocol
+carp 112 CARP vrrp # Common Address Redundancy Protocol
pgm 113 PGM # PGM Reliable Transport Protocol
# 114 # any 0-hop protocol
l2tp 115 L2TP # Layer Two Tunneling Protocol
diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile
index c6556e6..f15aaa7 100644
--- a/sbin/ifconfig/Makefile
+++ b/sbin/ifconfig/Makefile
@@ -24,7 +24,7 @@ SRCS+= ifmedia.c # SIOC[GS]IFMEDIA supp
SRCS+= ifvlan.c # SIOC[GS]ETVLAN support
SRCS+= ifieee80211.c # SIOC[GS]IEEE80211 support
-#SRCS+= ifcarp.c # SIOC[GS]VH support
+SRCS+= ifcarp.c # SIOC[GS]VH support
#SRCS+= ifpfsync.c # pfsync(4) support
SRCS+= ifbridge.c # bridge support
diff --git a/sbin/ifconfig/ifcarp.c b/sbin/ifconfig/ifcarp.c
new file mode 100644
index 0000000..2ab73f3
--- /dev/null
+++ b/sbin/ifconfig/ifcarp.c
@@ -0,0 +1,200 @@
+/* $Id$ */
+/* from $FreeBSD: src/sbin/ifconfig/ifcarp.c,v 1.2 2005/02/22 14:07:47 glebius Exp $ */
+/* from $OpenBSD: ifconfig.c,v 1.82 2003/10/19 05:43:35 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip_carp.h>
+#include <net/route.h>
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <err.h>
+#include <errno.h>
+
+#include "ifconfig.h"
+
+static const char *carp_states[] = { CARP_STATES };
+
+void carp_status(int s);
+void setcarp_advbase(const char *,int, int, const struct afswtch *rafp);
+void setcarp_advskew(const char *, int, int, const struct afswtch *rafp);
+void setcarp_passwd(const char *, int, int, const struct afswtch *rafp);
+void setcarp_vhid(const char *, int, int, const struct afswtch *rafp);
+
+void
+carp_status(int s)
+{
+ const char *state;
+ struct carpreq carpr;
+
+ memset((char *)&carpr, 0, sizeof(struct carpreq));
+ ifr.ifr_data = (caddr_t)&carpr;
+
+ if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
+ return;
+
+ if (carpr.carpr_vhid > 0) {
+ if (carpr.carpr_state > CARP_MAXSTATE)
+ state = "<UNKNOWN>";
+ else
+ state = carp_states[carpr.carpr_state];
+
+ printf("\tcarp: %s vhid %d advbase %d advskew %d\n",
+ state, carpr.carpr_vhid, carpr.carpr_advbase,
+ carpr.carpr_advskew);
+ }
+
+ return;
+
+}
+
+void
+setcarp_passwd(const char *val, int d, int s, const struct afswtch *afp)
+{
+ struct carpreq carpr;
+
+ memset((char *)&carpr, 0, sizeof(struct carpreq));
+ ifr.ifr_data = (caddr_t)&carpr;
+
+ if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCGVH");
+
+ /* XXX Should hash the password into the key here, perhaps? */
+ strlcpy(carpr.carpr_key, val, CARP_KEY_LEN);
+
+ if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSVH");
+
+ return;
+}
+
+void
+setcarp_vhid(const char *val, int d, int s, const struct afswtch *afp)
+{
+ int vhid;
+ struct carpreq carpr;
+
+ vhid = atoi(val);
+
+ if (vhid <= 0)
+ errx(1, "vhid must be greater than 0");
+
+ memset((char *)&carpr, 0, sizeof(struct carpreq));
+ ifr.ifr_data = (caddr_t)&carpr;
+
+ if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCGVH");
+
+ carpr.carpr_vhid = vhid;
+
+ if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSVH");
+
+ return;
+}
+
+void
+setcarp_advskew(const char *val, int d, int s, const struct afswtch *afp)
+{
+ int advskew;
+ struct carpreq carpr;
+
+ advskew = atoi(val);
+
+ memset((char *)&carpr, 0, sizeof(struct carpreq));
+ ifr.ifr_data = (caddr_t)&carpr;
+
+ if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCGVH");
+
+ carpr.carpr_advskew = advskew;
+
+ if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSVH");
+
+ return;
+}
+
+void
+setcarp_advbase(const char *val, int d, int s, const struct afswtch *afp)
+{
+ int advbase;
+ struct carpreq carpr;
+
+ advbase = atoi(val);
+
+ memset((char *)&carpr, 0, sizeof(struct carpreq));
+ ifr.ifr_data = (caddr_t)&carpr;
+
+ if (ioctl(s, SIOCGVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCGVH");
+
+ carpr.carpr_advbase = advbase;
+
+ if (ioctl(s, SIOCSVH, (caddr_t)&ifr) == -1)
+ err(1, "SIOCSVH");
+
+ return;
+}
+
+static struct cmd carp_cmds[] = {
+ DEF_CMD_ARG("advbase", setcarp_advbase),
+ DEF_CMD_ARG("advskew", setcarp_advskew),
+ DEF_CMD_ARG("pass", setcarp_passwd),
+ DEF_CMD_ARG("vhid", setcarp_vhid),
+};
+static struct afswtch af_carp = {
+ .af_name = "af_carp",
+ .af_af = AF_UNSPEC,
+ .af_other_status = carp_status,
+};
+
+static __constructor void
+carp_ctor(void)
+{
+#define N(a) (sizeof(a) / sizeof(a[0]))
+ int i;
+
+ for (i = 0; i < N(carp_cmds); i++)
+ cmd_register(&carp_cmds[i]);
+ af_register(&af_carp);
+#undef N
+}
diff --git a/sbin/ifconfig/ifconfig.8 b/sbin/ifconfig/ifconfig.8
index c832cc1..e9820fb 100644
--- a/sbin/ifconfig/ifconfig.8
+++ b/sbin/ifconfig/ifconfig.8
@@ -28,8 +28,9 @@
.\" From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94
.\" $FreeBSD: src/sbin/ifconfig/ifconfig.8,v 1.124 2006/10/10 09:44:08 ru Exp $
.\" $DragonFly: src/sbin/ifconfig/ifconfig.8,v 1.19 2007/04/09 21:20:37 swildner Exp $
+
.\"
-.Dd November 19, 2006
+.Dd April 8, 2007
.Dt IFCONFIG 8
.Os
.Sh NAME
@@ -1388,6 +1389,31 @@ The
argument is useless and hence deprecated.
.El
.Pp
+The following parameters are specific to
+.Xr carp 4
+interfaces:
+.Bl -tag -width indent
+.It Cm advbase Ar seconds
+Specifies the base of the advertisement interval in seconds.
+The acceptable values are 1 to 255.
+The default value is 1.
+.\" The default value is
+.\" .Dv CARP_DFLTINTV .
+.It Cm advskew Ar interval
+Specifies the skew to add to the base advertisement interval to
+make one host advertise slower than another host.
+It is specified in 1/256 of seconds.
+The acceptable values are 1 to 254.
+The default value is 0.
+.It Cm pass Ar phrase
+Set the authentication key to
+.Ar phrase .
+.It Cm vhid Ar n
+Set the virtual host ID.
+This is a required setting.
+Acceptable values are 1 to 255.
+.El
+.Pp
The
.Nm
utility displays the current configuration for a network interface
@@ -1460,6 +1486,7 @@ requested address is unknown, or the use
tried to alter an interface's configuration.
.Sh SEE ALSO
.Xr netstat 1 ,
+.Xr carp 4 ,
.Xr ifmedia 4 ,
.Xr netintro 4 ,
.Xr polling 4 ,
diff --git a/sbin/ifconfig/ifmedia.c b/sbin/ifconfig/ifmedia.c
index 978c79a..7876715 100644
--- a/sbin/ifconfig/ifmedia.c
+++ b/sbin/ifconfig/ifmedia.c
@@ -175,6 +175,12 @@ media_status(int s)
else
printf("no carrier");
break;
+ case IFM_CARP:
+ if (ifmr.ifm_status & IFM_ACTIVE)
+ printf("master");
+ else
+ printf("backup");
+ break;
}
putchar('\n');
}
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index 4b9fcba..53cf3c8 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -34,6 +34,7 @@ MAN= aac.4 \
bridge.4 \
bt.4 \
cardbus.4 \
+ carp.4 \
ccd.4 \
cd.4 \
ch.4 \
diff --git a/share/man/man4/carp.4 b/share/man/man4/carp.4
new file mode 100644
index 0000000..1a458a8
--- /dev/null
+++ b/share/man/man4/carp.4
@@ -0,0 +1,263 @@
+.\" $OpenBSD: carp.4,v 1.16 2004/12/07 23:41:35 jmc Exp $
+.\" $FreeBSD: src/share/man/man4/carp.4,v 1.10 2006/06/07 10:26:51 glebius Exp $
+.\" $Id$
+.\"
+.\" Copyright (c) 2003, Ryan McBride. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"
+.Dd April 9, 2007
+.Dt CARP 4
+.Os
+.Sh NAME
+.Nm carp
+.Nd Common Address Redundancy Protocol
+.Sh SYNOPSIS
+.Cd "device carp"
+.Sh DESCRIPTION
+The
+.Nm
+interface is a pseudo-device that implements and controls the
+CARP protocol.
+CARP allows multiple hosts on the same local network to share a set of IP addresses.
+Its primary purpose is to ensure that these
+addresses are always available, but in some configurations
+.Nm
+can also provide load balancing functionality.
+.Pp
+A
+.Nm
+interface can be created at runtime using the
+.Nm ifconfig Li carp Ns Ar N Cm create
+command or by configuring
+it via
+.Va cloned_interfaces
+in the
+.Pa /etc/rc.conf
+file.
+.Pp
+To use
+.Nm ,
+the administrator needs to configure at minimum a common virtual host ID (VHID)
+and virtual host IP address on each machine which is to take part in the virtual
+group.
+Additional parameters can also be set on a per-interface basis:
+.Cm advbase
+and
+.Cm advskew ,
+which are used to control how frequently the host sends advertisements when it
+is the master for a virtual host, and
+.Cm pass
+which is used to authenticate
+.Nm
+advertisements.
+The
+.Cm advbase
+parameter stands for
+.Dq "advertisement base" .
+It is measured in seconds and specifies the base of the advertisement interval.
+The
+.Cm advskew
+parameter stands for
+.Dq "advertisement skew" .
+It is measured in 1/256 of seconds.
+It is added to the base advertisement interval to make one host advertise
+a bit slower that the other does.
+Both
+.Cm advbase
+and
+.Cm advskew
+are put inside CARP advertisements.
+These configurations can be done using
+.Xr ifconfig 8 ,
+or through the
+.Dv SIOCSVH
+.Xr ioctl 2 .
+.Pp
+Additionally, there are a number of global parameters which can be set using
+.Xr sysctl 8 :
+.Bl -tag -width ".Va net.inet.carp.arpbalance"
+.It Va net.inet.carp.allow
+Accept incoming
+.Nm
+packets.
+Enabled by default.
+.It Va net.inet.carp.preempt
+Allow virtual hosts to preempt each other.
+It is also used to failover
+.Nm
+interfaces as a group.
+When the option is enabled and one of the
+.Nm
+enabled physical interfaces
+goes down,
+.Cm advskew
+is changed to 240 on all
+.Nm
+interfaces.
+See also the first example.
+Disabled by default.
+.It Va net.inet.carp.log
+Value of 0 disables any logging.
+Value of 1 enables logging of bad
+.Nm
+packets.
+Values above 1 enable logging state changes of
+.Nm
+interfaces.
+Default value is 1.
+.It Va net.inet.carp.arpbalance
+Balance local traffic using ARP (see below).
+Disabled by default.
+.It Va net.inet.carp.suppress_preempt
+A read only value showing the status of preemption suppression.
+Preemption can be suppressed if link on an interface is down
+or when
+.Xr pfsync 4
+interface is not synchronized.
+Value of 0 means that preemption is not suppressed, since no
+problems are detected.
+Every problem increments suppression counter.
+.El
+.Sh ARP level load balancing
+The
+.Nm
+has limited abilities for load balancing the incoming connections
+between hosts in Ethernet network.
+For load balancing operation, one needs several CARP interfaces that
+are configured to the same IP address, but to a different VHIDs.
+Once an ARP request is received, the CARP protocol will use a hashing
+function against the source IP address in the ARP request to determine
+which VHID should this request belong to.
+If the corresponding CARP interface is in master state, the ARP request
+will be replied, otherwise it will be ignored.
+See the
+.Sx EXAMPLES
+section for a practical example of load balancing.
+.Pp
+The ARP load balancing has some limitations.
+First, ARP balancing only works on the local network segment.
+It cannot balance traffic that crosses a router, because the
+router itself will always be balanced to the same virtual host.
+Second, ARP load balancing can lead to asymmetric routing
+of incoming and outgoing traffic, and thus combining it with
+.Xr pfsync 4
+is dangerous, because this creates a race condition between
+balanced routers and a host they are serving.
+Imagine an incoming packet creating state on the first router, being
+forwarded to its destination, and destination replying faster
+than the state information is packed and synced with the second router.
+If the reply would be load balanced to second router, it will be
+dropped due to no state.
+.Sh EXAMPLES
+For firewalls and routers with multiple interfaces, it is desirable to
+failover all of the
+.Nm
+interfaces together, when one of the physical interfaces goes down.
+This is achieved by the preempt option.
+Enable it on both host A and B:
+.Pp
+.Dl sysctl net.inet.carp.preempt=1
+.Pp
+Assume that host A is the preferred master and 192.168.1.x/24 is
+configured on one physical interface and 192.168.2.y/24 on another.
+This is the setup for host A:
+.Bd -literal -offset indent
+ifconfig carp0 create
+ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.1/24
+ifconfig carp1 create
+ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.2.1/24
+.Ed
+.Pp
+The setup for host B is identical, but it has a higher
+.Cm advskew :
+.Bd -literal -offset indent
+ifconfig carp0 create
+ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.1/24
+ifconfig carp1 create
+ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.2.1/24
+.Ed
+.Pp
+Because of the preempt option, when one of the physical interfaces of
+host A fails,
+.Cm advskew
+is adjusted to 240 on all its
+.Nm
+interfaces.
+This will cause host B to preempt on both interfaces instead of
+just the failed one.
+.Pp
+In order to set up an ARP balanced virtual host, it is necessary to configure
+one virtual host for each physical host which would respond to ARP requests
+and thus handle the traffic.
+In the following example, two virtual hosts are configured on two hosts to
+provide balancing and failover for the IP address 192.168.1.10.
+.Pp
+First the
+.Nm
+interfaces on host A are configured.
+The
+.Cm advskew
+of 100 on the second virtual host means that its advertisements will be sent
+out slightly less frequently.
+.Bd -literal -offset indent
+ifconfig carp0 create
+ifconfig carp0 vhid 1 pass mekmitasdigoat 192.168.1.10/24
+ifconfig carp1 create
+ifconfig carp1 vhid 2 advskew 100 pass mekmitasdigoat 192.168.1.10/24
+.Ed
+.Pp
+The configuration for host B is identical, except the
+.Cm advskew
+is on virtual host 1 rather than virtual host 2.
+.Bd -literal -offset indent
+ifconfig carp0 create
+ifconfig carp0 vhid 1 advskew 100 pass mekmitasdigoat 192.168.1.10/24
+ifconfig carp1 create
+ifconfig carp1 vhid 2 pass mekmitasdigoat 192.168.1.10/24
+.Ed
+.Pp
+Finally, the ARP balancing feature must be enabled on both hosts:
+.Pp
+.Dl sysctl net.inet.carp.arpbalance=1
+.Pp
+When the hosts receive an ARP request for 192.168.1.10, the source IP address
+of the request is used to compute which virtual host should answer the request.
+The host which is master of the selected virtual host will reply to the
+request, the other(s) will ignore it.
+.Pp
+This way, locally connected systems will receive different ARP replies and
+subsequent IP traffic will be balanced among the hosts.
+If one of the hosts fails, the other will take over the virtual MAC address,
+and begin answering ARP requests on its behalf.
+.Sh SEE ALSO
+.Xr inet 4 ,
+.Xr pfsync 4 ,
+.Xr rc.conf 5 ,
+.Xr ifconfig 8 ,
+.Xr sysctl 8
+.Sh HISTORY
+The
+.Nm
+device first appeared in
+.Ox 3.5 .
diff --git a/share/man/man4/pfsync.4 b/share/man/man4/pfsync.4
index 349e56e..f518690 100644
--- a/share/man/man4/pfsync.4
+++ b/share/man/man4/pfsync.4
@@ -24,7 +24,7 @@
.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.\"
-.Dd November 29, 2002
+.Dd April 9, 2007
.Dt PFSYNC 4
.Os
.Sh NAME
@@ -129,8 +129,76 @@ only the necessary information.
# ifconfig pfsync0 up syncif fxp0 maxupd 64
# tcpdump -s1500 -evtni pfsync0
.Ed
+.Sh USING PFSYNC WITH CARP
+.Nm
+and
+.Xr carp 4
+can be used together to provide automatic failover of a pair of firewalls
+configured in parallel.
+One firewall handles all traffic \- if it dies or
+is shut down, the second firewall takes over automatically.
+.Pp
+Both firewalls in this example have three
+.Xr sis 4
+interfaces.
+sis0 is the external interface, on the 10.0.0.0/24 subnet; sis1 is the
+internal interface, on the 192.168.0.0/24 subnet; and sis2 is the
+.Nm
+interface, using the 192.168.254.0/24 subnet.
+A crossover cable connects the two firewalls via their sis2 interfaces.
+On all three interfaces, firewall A uses the .254 address, while firewall B
+uses .253.
+The interfaces are configured as follows (firewall A unless otherwise
+indicated):
+.Pp
+Interfaces configuration in
+.Pa /etc/rc.conf :
+.Bd -literal -offset indent
+network_interfaces="lo0 sis0 sis1 sis2"
+cloned_interfaces="carp0 carp1"
+ifconfig_sis0="10.0.0.254/24"
+ifconfig_sis1="192.168.0.254/24"
+ifconfig_sis2="192.168.254.254/24"
+ifconfig_carp0="vhid 1 pass foo 10.0.0.1/24"
+ifconfig_carp1="vhid 2 pass bar 192.168.0.1/24"
+pfsync_enable="YES"
+pfsync_syncdev="sis2"
+.Ed
+.Pp
+.Xr pf 4
+must also be configured to allow
+.Nm
+and
+.Xr carp 4
+traffic through.
+The following should be added to the top of
+.Pa /etc/pf.conf :
+.Bd -literal -offset indent
+pass quick on { sis2 } proto pfsync
+pass quick on { sis0 sis1 } proto carp keep state
+.Ed
+.Pp
+If it is preferable that one firewall handle the traffic,
+the
+.Ar advskew
+on the backup firewall's
+.Xr carp 4
+interfaces should be set to something higher than
+the primary's.
+For example, if firewall B is the backup, its
+carp1 configuration would look like this:
+.Bd -literal -offset indent
+ifconfig_carp1="vhid 2 pass bar advskew 100 192.168.0.1/24"
+.Ed
+.Pp
+The following must also be added to
+.Pa /etc/sysctl.conf :
+.Bd -literal -offset indent
+net.inet.carp.preempt=1
+.Ed
.Sh SEE ALSO
.Xr tcpdump 1 ,
+.Xr carp 4 ,
.Xr bpf 4 ,
.Xr inet 4 ,
.Xr inet6 4 ,
diff --git a/sys/conf/files b/sys/conf/files
index 92cd5b7..5222fe3 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -79,6 +79,7 @@ crypto/des/des_setkey.c optional ipsec
crypto/rijndael/rijndael-alg-fst.c optional ipsec ipsec_esp
crypto/rijndael/rijndael-api-fst.c optional ipsec ipsec_esp
crypto/sha1.c optional ipsec
+crypto/sha1.c optional carp
crypto/sha2/sha2.c optional ipsec
ddb/db_access.c optional ddb
ddb/db_kld.c optional ddb
@@ -887,6 +888,7 @@ netinet/in_gif.c optional gif inet
netinet/igmp.c optional inet
netinet/in.c optional inet
netinet/in_cksum.c optional inet
+netinet/ip_carp.c optional carp
netinet/ip_gre.c optional gre inet
netinet/ip_id.c optional inet
netinet/in_pcb.c optional inet
diff --git a/sys/conf/options b/sys/conf/options
index 184ec98..bfeaaef 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -264,6 +264,7 @@ BOOTP_COMPAT opt_bootp.h
BOOTP_NFSROOT opt_bootp.h
BOOTP_NFSV3 opt_bootp.h
BOOTP_WIRED_TO opt_bootp.h
+CARP opt_carp.h
ETHER_II opt_ef.h
ETHER_8023 opt_ef.h
ETHER_8022 opt_ef.h
diff --git a/sys/config/LINT b/sys/config/LINT
index 58721c7..92bc5ad 100644
--- a/sys/config/LINT
+++ b/sys/config/LINT
@@ -633,6 +633,10 @@ device pf
device pfsync
device pflog
+#CARP
+pseudo-device carp
+options CARP
+
# The MBUF_STRESS_TEST option enables options which create
# various random failures / extreme cases related to mbuf
# functions. See the mbuf(9) manpage for a list of available
diff --git a/sys/config/VKERNEL b/sys/config/VKERNEL
index 6f2cfb6..151f2c8 100644
--- a/sys/config/VKERNEL
+++ b/sys/config/VKERNEL
@@ -73,6 +73,8 @@ options DDB
options DDB_TRACE
options INVARIANTS
+options CARP
+
# Floating point support - do not disable.
device npx0 at nexus?
@@ -87,6 +89,8 @@ pseudo-device md # Memory "disks"
pseudo-device gif # IPv6 and IPv4 tunneling
pseudo-device faith 1 # IPv6-to-IPv4 relaying (translation)
+pseudo-device carp
+
# The `bpf' pseudo-device enables the Berkeley Packet Filter.
# Be aware of the administrative consequences of enabling this!
pseudo-device bpf #Berkeley packet filter
@@ -96,3 +100,4 @@ pseudo-device bpf #Berkeley packet filt
device vn
device vkd
device vke
+
diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c
index d08ae24..7ed3ca4 100644
--- a/sys/net/if_ethersubr.c
+++ b/sys/net/if_ethersubr.c
@@ -40,6 +40,7 @@
#include "opt_inet6.h"
#include "opt_ipx.h"
#include "opt_netgraph.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -71,6 +72,10 @@
#include <netinet6/nd6.h>
#endif
+#ifdef CARP
+#include <netinet/ip_carp.h>
+#endif
+
#ifdef IPX
#include <netproto/ipx/ipx.h>
#include <netproto/ipx/ipx_if.h>
@@ -346,6 +351,12 @@ ether_output(struct ifnet *ifp, struct m
}
}
+#ifdef CARP
+ if (ifp->if_carp && (error = carp_output(ifp, m, dst, NULL)))
+ goto bad;
+#endif
+
+
/* Handle ng_ether(4) processing, if any */
if (ng_ether_output_p != NULL) {
if ((error = (*ng_ether_output_p)(ifp, &m)) != 0)
@@ -654,6 +665,20 @@ ether_demux(struct ifnet *ifp, struct et
if (rule) /* packet is passing the second time */
goto post_stats;
+#ifdef CARP
+ /*
+ * XXX: Okay, we need to call carp_forus() and - if it is for
+ * us jump over code that does the normal check
+ * "ac_enaddr == ether_dhost". The check sequence is a bit
+ * different from OpenBSD, so we jump over as few code as
+ * possible, to catch _all_ sanity checks. This needs
+ * evaluation, to see if the carp ether_dhost values break any
+ * of these checks!
+ */
+ if (ifp->if_carp && carp_forus(ifp->if_carp, eh->ether_dhost))
+ goto pre_stats;
+#endif
+
/*
* Discard packet if upper layers shouldn't see it because
* it was unicast to a different Ethernet address. If the
@@ -666,6 +691,11 @@ ether_demux(struct ifnet *ifp, struct et
m_freem(m);
return;
}
+
+#ifdef CARP
+pre_stats:
+#endif
+
/* Discard packet if interface is not up */
if (!(ifp->if_flags & IFF_UP)) {
m_freem(m);
diff --git a/sys/net/if_media.h b/sys/net/if_media.h
index cac9959..1dec5f6 100644
--- a/sys/net/if_media.h
+++ b/sys/net/if_media.h
@@ -239,6 +239,11 @@ int ifmedia_baudrate(int);
#define IFM_ATM_UNASSIGNED 0x00000400 /* unassigned cells */
/*
+ * CARP Common Address Redundancy Protocol
+ */
+#define IFM_CARP 0x000000c0
+
+/*
* Shared media sub-types
*/
#define IFM_AUTO 0 /* Autoselect best media */
@@ -316,6 +321,7 @@ struct ifmedia_description {
{ IFM_TOKEN, "Token ring" }, \
{ IFM_FDDI, "FDDI" }, \
{ IFM_IEEE80211, "IEEE 802.11 Wireless Ethernet" }, \
+ { IFM_CARP, "Common Address Redundancy Protocol" }, \
{ 0, NULL }, \
}
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
index d2e6742..4039b0d 100644
--- a/sys/net/if_types.h
+++ b/sys/net/if_types.h
@@ -252,4 +252,5 @@
#define IFT_STF 0xf3
#define IFT_PFLOG 0xf5 /* Packet filter logging */
#define IFT_PFSYNC 0xf6 /* Packet filter state syncing */
+#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
#endif /* !_NET_IF_TYPES_H_ */
diff --git a/sys/net/if_var.h b/sys/net/if_var.h
index 8dc1e40..1436a72 100644
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@@ -81,6 +81,7 @@ struct rtentry;
struct rt_addrinfo;
struct socket;
struct ether_header;
+struct carp_if;
struct ucred;
struct lwkt_serialize;
@@ -178,6 +179,7 @@ struct ifnet {
int if_dunit; /* unit or IF_DUNIT_NONE */
struct ifaddrhead if_addrhead; /* linked list of addresses per if */
int if_pcount; /* number of promiscuous listeners */
+ struct carp_if *if_carp; /* carp interface structure */
struct bpf_if *if_bpf; /* packet filter structure */
u_short if_index; /* numeric abbreviation for this if */
short if_timer; /* time 'til if_watchdog called */
diff --git a/sys/netinet/if_ether.c b/sys/netinet/if_ether.c
index b5c3410..1b7fc53 100644
--- a/sys/netinet/if_ether.c
+++ b/sys/netinet/if_ether.c
@@ -74,6 +74,7 @@
*/
#include "opt_inet.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -102,6 +103,10 @@
#include <net/if_arc.h>
#include <net/iso88025.h>
+#ifdef CARP
+#include <netinet/ip_carp.h>
+#endif
+
#define SIN(s) ((struct sockaddr_in *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
@@ -143,6 +148,7 @@ SYSCTL_INT(_net_link_ether_inet, OID_AUT
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_RW,
&arp_proxyall, 0, "");
+void arprequest_acces(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip, u_char *enaddr);
static void arp_rtrequest (int, struct rtentry *, struct rt_addrinfo *);
static void arprequest (struct ifnet *,
struct in_addr *, struct in_addr *, u_char *);
@@ -736,6 +742,7 @@ in_arpinput(struct mbuf *m)
#ifdef SMP
struct netmsg_arp_update msg;
#endif
+ u_int8_t *enaddr = NULL;
int op;
int req_len;
@@ -770,6 +777,19 @@ in_arpinput(struct mbuf *m)
if (ifp->if_bridge && ia->ia_ifp &&
ifp->if_bridge == ia->ia_ifp->if_bridge)
+
+#ifdef CARP
+ /*
+ * If the interface does not match, but the recieving interface
+ * is part of carp, we call carp_iamatch to see if this is a
+ * request for the virtual host ip.
+ * XXX: This is really ugly!
+ */
+ if (ifp->if_carp != NULL &&
+ carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) &&
+ itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
+ goto match;
+#endif
}
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash) {
/* Skip all ia's which don't match */
@@ -801,8 +821,10 @@ in_arpinput(struct mbuf *m)
return;
match:
+ if (!enaddr)
+ enaddr = (u_int8_t *)IF_LLADDR(ifp);
myaddr = ia->ia_addr.sin_addr;
- if (!bcmp(ar_sha(ah), IF_LLADDR(ifp), ifp->if_addrlen)) {
+ if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen)) {
m_freem(m); /* it's from me, ignore it. */
return;
}
@@ -839,7 +861,7 @@ reply:
if (itaddr.s_addr == myaddr.s_addr) {
/* I am the target */
memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
+ memcpy(ar_sha(ah), enaddr, ah->ar_hln);
} else {
struct llinfo_arp *la;
@@ -873,7 +895,7 @@ reply:
return;
}
memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
- memcpy(ar_sha(ah), IF_LLADDR(ifp), ah->ar_hln);
+ memcpy(ar_sha(ah), enaddr, ah->ar_hln);
#ifdef DEBUG_PROXY
kprintf("arp: proxying for %s\n", inet_ntoa(itaddr));
#endif
@@ -1039,6 +1061,16 @@ arp_ifinit(struct ifnet *ifp, struct ifa
ifa->ifa_flags |= RTF_CLONING;
}
+void
+arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
+{
+ if (IA_SIN(ifa)->sin_addr.s_addr != INADDR_ANY)
+ arprequest(ifp, &IA_SIN(ifa)->sin_addr, &IA_SIN(ifa)->sin_addr,
+ enaddr);
+ ifa->ifa_rtrequest = arp_rtrequest;
+ ifa->ifa_flags |= RTF_CLONING;
+}
+
static void
arp_init(void)
{
diff --git a/sys/netinet/if_ether.h b/sys/netinet/if_ether.h
index 6a2a65a..97dce14 100644
--- a/sys/netinet/if_ether.h
+++ b/sys/netinet/if_ether.h
@@ -124,6 +124,7 @@ extern u_char ether_ipmulticast_max[ETHE
int arpresolve (struct ifnet *, struct rtentry *, struct mbuf *,
struct sockaddr *, u_char *);
void arp_ifinit (struct ifnet *, struct ifaddr *);
+void arp_ifinit2 (struct ifnet *, struct ifaddr *, u_char *);
#endif
#endif
diff --git a/sys/netinet/in.h b/sys/netinet/in.h
index c867488..5cdedfb 100644
--- a/sys/netinet/in.h
+++ b/sys/netinet/in.h
@@ -169,6 +169,7 @@
#define IPPROTO_IPCOMP 108 /* payload compression (IPComp) */
/* 101-254: Partly Unassigned */
#define IPPROTO_PIM 103 /* Protocol Independent Mcast */
+#define IPPROTO_CARP 112 /* CARP */
#define IPPROTO_PGM 113 /* PGM */
#define IPPROTO_SCTP 132 /* SCTP */
#define IPPROTO_PFSYNC 240 /* PFSYNC */
@@ -297,6 +298,7 @@ struct in_addr {
#define INADDR_UNSPEC_GROUP (u_int32_t)0xe0000000 /* 224.0.0.0 */
#define INADDR_ALLHOSTS_GROUP (u_int32_t)0xe0000001 /* 224.0.0.1 */
#define INADDR_ALLRTRS_GROUP (u_int32_t)0xe0000002 /* 224.0.0.2 */
+#define INADDR_CARP_GROUP (u_int32_t)0xe0000012 /* 224.0.0.18 */
#define INADDR_PFSYNC_GROUP (u_int32_t)0xe00000f0 /* 224.0.0.240 */
#define INADDR_MAX_LOCAL_GROUP (u_int32_t)0xe00000ff /* 224.0.0.255 */
diff --git a/sys/netinet/in_proto.c b/sys/netinet/in_proto.c
index ab5bfca..5babd61 100644
--- a/sys/netinet/in_proto.c
+++ b/sys/netinet/in_proto.c
@@ -41,6 +41,7 @@
#include "opt_ipsec.h"
#include "opt_inet6.h"
#include "opt_sctp.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/kernel.h>
@@ -105,6 +106,10 @@
#include <net/netisr.h> /* for cpu0_soport */
+#ifdef CARP
+#include <netinet/ip_carp.h>
+#endif
+
extern struct domain inetdomain;
static struct pr_usrreqs nousrreqs;
@@ -294,6 +299,16 @@ struct protosw inetsw[] = {
rip_init, 0, 0, 0,
&rip_usrreqs
},
+
+#ifdef CARP
+ { SOCK_RAW, &inetdomain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR,
+ carp_input, rip_output, 0, rip_ctloutput,
+ 0,
+ 0, 0, 0, 0,
+ &rip_usrreqs
+},
+
+#endif
};
struct domain inetdomain = {
@@ -332,3 +347,6 @@ SYSCTL_NODE(_net_inet, IPPROTO_DIVERT, d
#ifdef PIM
SYSCTL_NODE(_net_inet, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
#endif
+#ifdef CARP
+SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP");
+#endif
diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c
new file mode 100644
index 0000000..00a40c4
--- /dev/null
+++ b/sys/netinet/ip_carp.c
@@ -0,0 +1,2215 @@
+/* $Id$ */
+/* from $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "opt_carp.h"
+/*#include "opt_bpf.h"*/
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <machine/limits.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/module.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/signalvar.h>
+#include <sys/filio.h>
+#include <sys/sockio.h>
+#include <sys/in_cksum.h>
+#include <sys/socket.h>
+#include <sys/vnode.h>
+
+#include <machine/stdarg.h>
+
+#include <net/bpf.h>
+#include <net/ethernet.h>
+#include <net/iso88025.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/route.h>
+
+#ifdef INET
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/if_ether.h>
+#include <netinet/if_fddi.h>
+#endif
+
+#ifdef INET6
+#include <netinet/icmp6.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/scope6_var.h>
+#include <netinet6/nd6.h>
+#endif
+
+#include <crypto/sha1.h>
+#include <netinet/ip_carp.h>
+#include <sys/lock.h>
+
+#define CARP_IFNAME "carp"
+static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
+static MALLOC_DEFINE(M_IFNET, "IFNET", "IFNET CARP?");
+SYSCTL_DECL(_net_inet_carp);
+
+struct carp_softc {
+ struct ifnet *sc_ifp; /* Interface clue */
+ struct ifnet *sc_carpdev; /* Pointer to parent interface */
+ struct in_ifaddr *sc_ia; /* primary iface address */
+ struct ip_moptions sc_imo;
+#ifdef INET6
+ struct in6_ifaddr *sc_ia6; /* primary iface address v6 */
+ struct ip6_moptions sc_im6o;
+#endif /* INET6 */
+ TAILQ_ENTRY(carp_softc) sc_list;
+
+ enum { INIT = 0, BACKUP, MASTER } sc_state;
+
+ int sc_flags_backup;
+ int sc_suppress;
+
+ int sc_sendad_errors;
+#define CARP_SENDAD_MAX_ERRORS 3
+ int sc_sendad_success;
+#define CARP_SENDAD_MIN_SUCCESS 3
+
+ int sc_vhid;
+ int sc_advskew;
+ int sc_naddrs;
+ int sc_naddrs6;
+ int sc_advbase; /* seconds */
+ int sc_init_counter;
+ u_int64_t sc_counter;
+
+ /* authentication */
+#define CARP_HMAC_PAD 64
+ unsigned char sc_key[CARP_KEY_LEN];
+ unsigned char sc_pad[CARP_HMAC_PAD];
+ SHA1_CTX sc_sha1;
+
+ struct callout sc_ad_tmo; /* advertisement timeout */
+ struct callout sc_md_tmo; /* master down timeout */
+ struct callout sc_md6_tmo; /* master down timeout */
+
+ LIST_ENTRY(carp_softc) sc_next; /* Interface clue */
+};
+#define SC2IFP(sc) ((sc)->sc_ifp)
+
+int carp_suppress_preempt = 0;
+int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0 }; /* XXX for now */
+SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
+ &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
+ &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
+SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
+ &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
+SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
+ &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
+SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
+ &carp_suppress_preempt, 0, "Preemption is suppressed");
+
+struct carpstats carpstats;
+SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
+ &carpstats, carpstats,
+ "CARP statistics (struct carpstats, netinet/ip_carp.h)");
+
+struct carp_if {
+ TAILQ_HEAD(, carp_softc) vhif_vrs;
+ int vhif_nvrs;
+
+ struct ifnet *vhif_ifp;
+ struct lock vhif_lock;
+};
+
+/* Get carp_if from softc. Valid after carp_set_addr{,6}. */
+#define SC2CIF(sc) ((struct carp_if *)(sc)->sc_carpdev->if_carp)
+
+#define CARP_LOCK_INIT(cif) lockinit(&(cif)->vhif_lock, "carp_if", 0, LK_NOWAIT);
+#define CARP_LOCK_DESTROY(cif) ;
+#define CARP_LOCK_ASSERT(cif) ;
+#define CARP_LOCK(cif) lockmgr(&(cif)->vhif_lock, LK_EXCLUSIVE);
+#define CARP_UNLOCK(cif) lockmgr(&(cif)->vhif_lock, LK_RELEASE);
+
+#define CARP_SCLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_EXCLUSIVE);
+#define CARP_SCUNLOCK(sc) lockmgr(&SC2CIF(sc)->vhif_lock, LK_RELEASE);
+#define CARP_SCLOCK_ASSERT(sc) ;
+
+#define CARP_LOG(...) do { \
+ if (carp_opts[CARPCTL_LOG] > 0) \
+ log(LOG_INFO, __VA_ARGS__); \
+} while (0)
+
+#define CARP_DEBUG(...) do { \
+ if (carp_opts[CARPCTL_LOG] > 1) \
+ log(LOG_DEBUG, __VA_ARGS__); \
+} while (0)
+
+static void carp_hmac_prepare(struct carp_softc *);
+static void carp_hmac_generate(struct carp_softc *, u_int32_t *,
+ unsigned char *);
+static int carp_hmac_verify(struct carp_softc *, u_int32_t *,
+ unsigned char *);
+static void carp_setroute(struct carp_softc *, int);
+static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t);
+static int carp_clone_create(struct if_clone *, int);
+static void carp_clone_destroy(struct ifnet *);
+static void carpdetach(struct carp_softc *, int);
+static int carp_prepare_ad(struct mbuf *, struct carp_softc *,
+ struct carp_header *);
+static void carp_send_ad_all(void);
+static void carp_send_ad(void *);
+static void carp_send_ad_locked(struct carp_softc *);
+static void carp_send_arp(struct carp_softc *);
+static void carp_master_down(void *);
+static void carp_master_down_locked(struct carp_softc *);
+static int carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
+static int carp_looutput(struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+static void carp_start(struct ifnet *);
+static void carp_setrun(struct carp_softc *, sa_family_t);
+static void carp_set_state(struct carp_softc *, int);
+static int carp_addrcount(struct carp_if *, struct in_ifaddr *, int);
+enum { CARP_COUNT_MASTER, CARP_COUNT_RUNNING };
+
+static void carp_multicast_cleanup(struct carp_softc *);
+static int carp_set_addr(struct carp_softc *, struct sockaddr_in *);
+static int carp_del_addr(struct carp_softc *, struct sockaddr_in *);
+static void carp_carpdev_state_locked(struct carp_if *);
+static void carp_sc_state_locked(struct carp_softc *);
+#ifdef INET6
+static void carp_send_na(struct carp_softc *);
+static int carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static int carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
+static void carp_multicast6_cleanup(struct carp_softc *);
+#endif
+
+static LIST_HEAD(, carp_softc) carpif_list;
+
+struct if_clone carp_cloner = IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy, 0, IF_MAXUNIT);
+
+static eventhandler_tag if_detach_event_tag;
+
+static __inline u_int16_t
+carp_cksum(struct mbuf *m, int len)
+{
+ return (in_cksum(m, len));
+}
+
+static void
+carp_hmac_prepare(struct carp_softc *sc)
+{
+ u_int8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
+ u_int8_t vhid = sc->sc_vhid & 0xff;
+ struct ifaddr *ifa;
+ int i;
+#ifdef INET6
+ struct in6_addr in6;
+#endif
+
+ if (sc->sc_carpdev)
+ CARP_SCLOCK(sc);
+
+ /* XXX: possible race here */
+
+ /* compute ipad from key */
+ bzero(sc->sc_pad, sizeof(sc->sc_pad));
+ bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
+ for (i = 0; i < sizeof(sc->sc_pad); i++)
+ sc->sc_pad[i] ^= 0x36;
+
+ /* precompute first part of inner hash */
+ SHA1Init(&sc->sc_sha1);
+ SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
+ SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
+ SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
+ SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
+#ifdef INET
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET)
+ SHA1Update(&sc->sc_sha1,
+ (void *)&ifatoia(ifa)->ia_addr.sin_addr.s_addr,
+ sizeof(struct in_addr));
+ }
+#endif /* INET */
+#ifdef INET6
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
+ in6_clearscope(&in6);
+ SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
+ }
+ }
+#endif /* INET6 */
+
+ /* convert ipad to opad */
+ for (i = 0; i < sizeof(sc->sc_pad); i++)
+ sc->sc_pad[i] ^= 0x36 ^ 0x5c;
+
+ if (sc->sc_carpdev)
+ CARP_SCUNLOCK(sc);
+}
+
+static void
+carp_hmac_generate(struct carp_softc *sc, u_int32_t counter[2],
+ unsigned char md[20])
+{
+ SHA1_CTX sha1ctx;
+
+ /* fetch first half of inner hash */
+ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
+
+ SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
+ SHA1Final(md, &sha1ctx);
+
+ /* outer hash */
+ SHA1Init(&sha1ctx);
+ SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
+ SHA1Update(&sha1ctx, md, 20);
+ SHA1Final(md, &sha1ctx);
+}
+
+static int
+carp_hmac_verify(struct carp_softc *sc, u_int32_t counter[2],
+ unsigned char md[20])
+{
+ unsigned char md2[20];
+
+ CARP_SCLOCK_ASSERT(sc);
+
+ carp_hmac_generate(sc, counter, md2);
+
+ return (bcmp(md, md2, sizeof(md2)));
+}
+
+static void
+carp_setroute(struct carp_softc *sc, int cmd)
+{
+ struct ifaddr *ifa;
+
+ if (sc->sc_carpdev)
+ CARP_SCLOCK_ASSERT(sc);
+
+ crit_enter();
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ sc->sc_carpdev != NULL) {
+ int count = carp_addrcount(
+ (struct carp_if *)sc->sc_carpdev->if_carp,
+ ifatoia(ifa), CARP_COUNT_MASTER);
+
+ if ((cmd == RTM_ADD && count == 1) ||
+ (cmd == RTM_DELETE && count == 0))
+ rtinit(ifa, cmd, RTF_UP | RTF_HOST);
+ }
+#ifdef INET6
+ if (ifa->ifa_addr->sa_family == AF_INET6) {
+ if (cmd == RTM_ADD)
+ in6_ifaddloop(ifa);
+ else
+ in6_ifremloop(ifa);
+ }
+#endif /* INET6 */
+ }
+ crit_exit();
+
+}
+
+static int
+carp_clone_create(struct if_clone *ifc, int unit)
+{
+
+ struct carp_softc *sc;
+ struct ifnet *ifp;
+
+ MALLOC(sc, struct carp_softc *, sizeof(*sc), M_CARP, M_WAITOK|M_ZERO);
+ ifp = SC2IFP(sc) = kmalloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
+
+ if (ifp == NULL) {
+ FREE(sc, M_CARP);
+ return (ENOSPC);
+ }
+
+ sc->sc_flags_backup = 0;
+ sc->sc_suppress = 0;
+ sc->sc_advbase = CARP_DFLTINTV;
+ sc->sc_vhid = -1; /* required setting */
+ sc->sc_advskew = 0;
+ sc->sc_init_counter = 1;
+ sc->sc_naddrs = sc->sc_naddrs6 = 0; /* M_ZERO? */
+
+#ifdef INET6
+ sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
+#endif
+
+/* sc->sc_imo.imo_membership = kmalloc((sizeof(struct in_multi) * IP_MAX_MEMBERSHIPS), M_CARP,M_WAITOK);*/
+/*
+ sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
+ sc->sc_imo.imo_multicast_vif = -1;
+*/
+ callout_init(&sc->sc_ad_tmo);
+ callout_init(&sc->sc_md_tmo);
+ callout_init(&sc->sc_md6_tmo);
+
+ ifp->if_softc = sc;
+ if_initname(ifp, CARP_IFNAME, unit);
+ ifp->if_mtu = ETHERMTU;
+ ifp->if_flags = IFF_LOOPBACK;
+ ifp->if_ioctl = carp_ioctl;
+ ifp->if_output = carp_looutput;
+ ifp->if_start = carp_start;
+ ifp->if_type = IFT_CARP;
+ ifp->if_snd.ifq_maxlen = ifqmaxlen;
+ ifp->if_hdrlen = 0;
+ if_attach(ifp, NULL);
+ bpfattach(ifp, DLT_NULL, sizeof(u_int));
+
+ crit_enter();
+ LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
+ crit_exit();
+
+ return (0);
+}
+
+static void
+carp_clone_destroy(struct ifnet *ifp)
+{
+ struct carp_softc *sc = ifp->if_softc;
+
+ if (sc->sc_carpdev)
+ CARP_SCLOCK(sc);
+ carpdetach(sc, 1); /* Returns unlocked. */
+
+ crit_enter();
+ LIST_REMOVE(sc, sc_next);
+ crit_exit();
+ bpfdetach(ifp);
+ if_detach(ifp);
+/* if_free_type(ifp, IFT_ETHER);*/
+/* kfree(sc->sc_imo.imo_membership, M_CARP); */
+ kfree(sc, M_CARP);
+}
+
+/*
+ * This function can be called on CARP interface destroy path,
+ * and in case of the removal of the underlying interface as
+ * well. We differentiate these two cases. In the latter case
+ * we do not cleanup our multicast memberships, since they
+ * are already freed. Also, in the latter case we do not
+ * release the lock on return, because the function will be
+ * called once more, for another CARP instance on the same
+ * interface.
+ */
+static void
+carpdetach(struct carp_softc *sc, int unlock)
+{
+ struct carp_if *cif;
+
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+
+ if (sc->sc_suppress)
+ carp_suppress_preempt--;
+ sc->sc_suppress = 0;
+
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
+
+ carp_set_state(sc, INIT);
+ SC2IFP(sc)->if_flags &= ~IFF_UP;
+ carp_setrun(sc, 0);
+ if (unlock)
+ carp_multicast_cleanup(sc);
+#ifdef INET6
+ carp_multicast6_cleanup(sc);
+#endif
+
+ if (sc->sc_carpdev != NULL) {
+ cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ CARP_LOCK_ASSERT(cif);
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ ifpromisc(sc->sc_carpdev, 0);
+ sc->sc_carpdev->if_carp = NULL;
+ CARP_LOCK_DESTROY(cif);
+ FREE(cif, M_IFADDR);
+ } else if (unlock)
+ CARP_UNLOCK(cif);
+ sc->sc_carpdev = NULL;
+ }
+}
+
+/* Detach an interface from the carp. */
+static void
+carp_ifdetach(void *arg __unused, struct ifnet *ifp)
+{
+ struct carp_if *cif = (struct carp_if *)ifp->if_carp;
+ struct carp_softc *sc, *nextsc;
+
+ if (cif == NULL)
+ return;
+
+ /*
+ * XXX: At the end of for() cycle the lock will be destroyed.
+ */
+ CARP_LOCK(cif);
+ for (sc = TAILQ_FIRST(&cif->vhif_vrs); sc; sc = nextsc) {
+ nextsc = TAILQ_NEXT(sc, sc_list);
+ carpdetach(sc, 0);
+ }
+ CARP_UNLOCK(cif);
+}
+
+/*
+ * process input packet.
+ * we have rearranged checks order compared to the rfc,
+ * but it seems more efficient this way or not possible otherwise.
+ */
+void
+carp_input(struct mbuf *m, int hlen)
+{
+ struct ip *ip = mtod(m, struct ip *);
+ struct carp_header *ch;
+ int iplen, len;
+
+ carpstats.carps_ipackets++;
+
+ if (!carp_opts[CARPCTL_ALLOW]) {
+ m_freem(m);
+ return;
+ }
+
+ /* check if received on a valid carp interface */
+ if (m->m_pkthdr.rcvif->if_carp == NULL) {
+ carpstats.carps_badif++;
+ CARP_LOG("carp_input: packet received on non-carp "
+ "interface: %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return;
+ }
+
+ /* verify that the IP TTL is 255. */
+ if (ip->ip_ttl != CARP_DFLTTL) {
+ carpstats.carps_badttl++;
+ CARP_LOG("carp_input: received ttl %d != 255i on %s\n",
+ ip->ip_ttl,
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return;
+ }
+
+ iplen = ip->ip_hl << 2;
+
+ if (m->m_pkthdr.len < iplen + sizeof(*ch)) {
+ carpstats.carps_badlen++;
+ CARP_LOG("carp_input: received len %zd < "
+ "sizeof(struct carp_header)\n",
+ m->m_len - sizeof(struct ip));
+ m_freem(m);
+ return;
+ }
+
+ if (iplen + sizeof(*ch) < m->m_len) {
+ if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) {
+ carpstats.carps_hdrops++;
+ CARP_LOG("carp_input: pullup failed\n");
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ }
+ ch = (struct carp_header *)((char *)ip + iplen);
+
+ /*
+ * verify that the received packet length is
+ * equal to the CARP header
+ */
+ len = iplen + sizeof(*ch);
+ if (len > m->m_pkthdr.len) {
+ carpstats.carps_badlen++;
+ CARP_LOG("carp_input: packet too short %d on %s\n",
+ m->m_pkthdr.len,
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return;
+ }
+
+ if ((m = m_pullup(m, len)) == NULL) {
+ carpstats.carps_hdrops++;
+ return;
+ }
+ ip = mtod(m, struct ip *);
+ ch = (struct carp_header *)((char *)ip + iplen);
+
+ /* verify the CARP checksum */
+ m->m_data += iplen;
+ if (carp_cksum(m, len - iplen)) {
+ carpstats.carps_badsum++;
+ CARP_LOG("carp_input: checksum failed on %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return;
+ }
+ m->m_data -= iplen;
+
+ carp_input_c(m, ch, AF_INET);
+}
+
+#ifdef INET6
+int
+carp6_input(struct mbuf **mp, int *offp, int proto)
+{
+ struct mbuf *m = *mp;
+ struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
+ struct carp_header *ch;
+ u_int len;
+
+ carpstats.carps_ipackets6++;
+
+ if (!carp_opts[CARPCTL_ALLOW]) {
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* check if received on a valid carp interface */
+ if (m->m_pkthdr.rcvif->if_carp == NULL) {
+ carpstats.carps_badif++;
+ CARP_LOG("carp6_input: packet received on non-carp "
+ "interface: %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* verify that the IP TTL is 255 */
+ if (ip6->ip6_hlim != CARP_DFLTTL) {
+ carpstats.carps_badttl++;
+ CARP_LOG("carp6_input: received ttl %d != 255 on %s\n",
+ ip6->ip6_hlim,
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+
+ /* verify that we have a complete carp packet */
+ len = m->m_len;
+ IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
+ if (ch == NULL) {
+ carpstats.carps_badlen++;
+ CARP_LOG("carp6_input: packet size %u too small\n", len);
+ return (IPPROTO_DONE);
+ }
+
+
+ /* verify the CARP checksum */
+ m->m_data += *offp;
+ if (carp_cksum(m, sizeof(*ch))) {
+ carpstats.carps_badsum++;
+ CARP_LOG("carp6_input: checksum failed, on %s\n",
+ m->m_pkthdr.rcvif->if_xname);
+ m_freem(m);
+ return (IPPROTO_DONE);
+ }
+ m->m_data -= *offp;
+
+ carp_input_c(m, ch, AF_INET6);
+ return (IPPROTO_DONE);
+}
+#endif /* INET6 */
+
+static void
+carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af)
+{
+ struct ifnet *ifp = m->m_pkthdr.rcvif;
+ struct carp_softc *sc;
+ u_int64_t tmp_counter;
+ struct timeval sc_tv, ch_tv;
+
+ /* verify that the VHID is valid on the receiving interface */
+ CARP_LOCK(ifp->if_carp);
+ TAILQ_FOREACH(sc, &((struct carp_if *)ifp->if_carp)->vhif_vrs, sc_list)
+ if (sc->sc_vhid == ch->carp_vhid)
+ break;
+
+ if (!sc || !((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
+ carpstats.carps_badvhid++;
+ CARP_UNLOCK(ifp->if_carp);
+ m_freem(m);
+ return;
+ }
+
+ getmicrotime(&SC2IFP(sc)->if_lastchange);
+ SC2IFP(sc)->if_ipackets++;
+ SC2IFP(sc)->if_ibytes += m->m_pkthdr.len;
+
+ if (SC2IFP(sc)->if_bpf) {
+ struct ip *ip = mtod(m, struct ip *);
+
+ /* BPF wants net byte order */
+ ip->ip_len = htons(ip->ip_len + (ip->ip_hl << 2));
+ ip->ip_off = htons(ip->ip_off);
+ bpf_mtap(SC2IFP(sc)->if_bpf, m);
+ }
+
+ /* verify the CARP version. */
+ if (ch->carp_version != CARP_VERSION) {
+ carpstats.carps_badver++;
+ SC2IFP(sc)->if_ierrors++;
+ CARP_UNLOCK(ifp->if_carp);
+ CARP_LOG("%s; invalid version %d\n",
+ SC2IFP(sc)->if_xname,
+ ch->carp_version);
+ m_freem(m);
+ return;
+ }
+
+ /* verify the hash */
+ if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
+ carpstats.carps_badauth++;
+ SC2IFP(sc)->if_ierrors++;
+ CARP_UNLOCK(ifp->if_carp);
+ CARP_LOG("%s: incorrect hash\n", SC2IFP(sc)->if_xname);
+ m_freem(m);
+ return;
+ }
+
+ tmp_counter = ntohl(ch->carp_counter[0]);
+ tmp_counter = tmp_counter<<32;
+ tmp_counter += ntohl(ch->carp_counter[1]);
+
+ /* XXX Replay protection goes here */
+
+ sc->sc_init_counter = 0;
+ sc->sc_counter = tmp_counter;
+
+ sc_tv.tv_sec = sc->sc_advbase;
+ if (carp_suppress_preempt && sc->sc_advskew < 240)
+ sc_tv.tv_usec = 240 * 1000000 / 256;
+ else
+ sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ ch_tv.tv_sec = ch->carp_advbase;
+ ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
+
+ switch (sc->sc_state) {
+ case INIT:
+ break;
+ case MASTER:
+ /*
+ * If we receive an advertisement from a master who's going to
+ * be more frequent than us, go into BACKUP state.
+ */
+ if (timevalcmp(&sc_tv, &ch_tv, >) ||
+ timevalcmp(&sc_tv, &ch_tv, ==)) {
+ callout_stop(&sc->sc_ad_tmo);
+ CARP_DEBUG("%s: MASTER -> BACKUP "
+ "(more frequent advertisement received)\n",
+ SC2IFP(sc)->if_xname);
+ carp_set_state(sc, BACKUP);
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_DELETE);
+ }
+ break;
+ case BACKUP:
+ /*
+ * If we're pre-empting masters who advertise slower than us,
+ * and this one claims to be slower, treat him as down.
+ */
+ if (carp_opts[CARPCTL_PREEMPT] &&
+ timevalcmp(&sc_tv, &ch_tv, <)) {
+ CARP_DEBUG("%s: BACKUP -> MASTER "
+ "(preempting a slower master)\n",
+ SC2IFP(sc)->if_xname);
+ carp_master_down_locked(sc);
+ break;
+ }
+
+ /*
+ * If the master is going to advertise at such a low frequency
+ * that he's guaranteed to time out, we'd might as well just
+ * treat him as timed out now.
+ */
+ sc_tv.tv_sec = sc->sc_advbase * 3;
+ if (timevalcmp(&sc_tv, &ch_tv, <)) {
+ CARP_DEBUG("%s: BACKUP -> MASTER "
+ "(master timed out)\n",
+ SC2IFP(sc)->if_xname);
+ carp_master_down_locked(sc);
+ break;
+ }
+
+ /*
+ * Otherwise, we reset the counter and wait for the next
+ * advertisement.
+ */
+ carp_setrun(sc, af);
+ break;
+ }
+
+ CARP_UNLOCK(ifp->if_carp);
+
+ m_freem(m);
+ return;
+}
+
+static int
+carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch)
+{
+ struct m_tag *mtag;
+ struct ifnet *ifp = SC2IFP(sc);
+
+ if (sc->sc_init_counter) {
+ /* this could also be seconds since unix epoch */
+ sc->sc_counter = karc4random();
+ sc->sc_counter = sc->sc_counter << 32;
+ sc->sc_counter += karc4random();
+ } else
+ sc->sc_counter++;
+
+ ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff);
+ ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff);
+
+ carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
+
+ /* Tag packet for carp_output */
+ mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ m_freem(m);
+ SC2IFP(sc)->if_oerrors++;
+ return (ENOMEM);
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ return (0);
+}
+
+static void
+carp_send_ad_all(void)
+{
+ struct carp_softc *sc;
+
+ LIST_FOREACH(sc, &carpif_list, sc_next) {
+ if (sc->sc_carpdev == NULL)
+ continue;
+ CARP_SCLOCK(sc);
+ if ((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING) &&
+ sc->sc_state == MASTER)
+ carp_send_ad_locked(sc);
+ CARP_SCUNLOCK(sc);
+ }
+}
+
+static void
+carp_send_ad(void *v)
+{
+ struct carp_softc *sc = v;
+
+ CARP_SCLOCK(sc);
+ carp_send_ad_locked(sc);
+ CARP_SCUNLOCK(sc);
+}
+
+static void
+carp_send_ad_locked(struct carp_softc *sc)
+ struct carp_header ch;
+ struct timeval tv;
+ struct carp_header *ch_ptr;
+ struct mbuf *m;
+ int len, advbase, advskew;
+
+
+ /* bow out if we've lost our UPness or RUNNINGuiness */
+ if (!((SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING))) {
+ advbase = 255;
+ advskew = 255;
+ } else {
+ advbase = sc->sc_advbase;
+ if (!carp_suppress_preempt || sc->sc_advskew > 240)
+ advskew = sc->sc_advskew;
+ else
+ advskew = 240;
+ tv.tv_sec = advbase;
+ tv.tv_usec = advskew * 1000000 / 256;
+ }
+
+ ch.carp_version = CARP_VERSION;
+ ch.carp_type = CARP_ADVERTISEMENT;
+ ch.carp_vhid = sc->sc_vhid;
+ ch.carp_advbase = advbase;
+ ch.carp_advskew = advskew;
+ ch.carp_authlen = 7; /* XXX DEFINE */
+ ch.carp_pad1 = 0; /* must be zero */
+ ch.carp_cksum = 0;
+
+#ifdef INET
+ if (sc->sc_ia) {
+ struct ip *ip;
+
+ MGETHDR(m, M_NOWAIT, MT_HEADER);
+ if (m == NULL) {
+ SC2IFP(sc)->if_oerrors++;
+ carpstats.carps_onomem++;
+ /* XXX maybe less ? */
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
+ carp_send_ad, sc);
+ return;
+ }
+ len = sizeof(*ip) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ ip->ip_tos = IPTOS_LOWDELAY;
+ ip->ip_len = len;
+ ip->ip_id = ip_newid();
+ ip->ip_off = IP_DF;
+ ip->ip_ttl = CARP_DFLTTL;
+ ip->ip_p = IPPROTO_CARP;
+ ip->ip_sum = 0;
+ ip->ip_src.s_addr = sc->sc_ia->ia_addr.sin_addr.s_addr;
+ ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
+
+ ch_ptr = (struct carp_header *)(&ip[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+ if (carp_prepare_ad(m, sc, ch_ptr))
+ return;
+
+ m->m_data += sizeof(*ip);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip));
+ m->m_data -= sizeof(*ip);
+
+ getmicrotime(&SC2IFP(sc)->if_lastchange);
+ SC2IFP(sc)->if_opackets++;
+ SC2IFP(sc)->if_obytes += len;
+ carpstats.carps_opackets++;
+
+ if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
+ SC2IFP(sc)->if_oerrors++;
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1) {
+ CARP_SCUNLOCK(sc);
+ carp_send_ad_all();
+ CARP_SCLOCK(sc);
+ }
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+ if (++sc->sc_sendad_success >=
+ CARP_SENDAD_MIN_SUCCESS) {
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
+ }
+ } else
+ sc->sc_sendad_errors = 0;
+ }
+ }
+#endif /* INET */
+#ifdef INET6
+ if (sc->sc_ia6) {
+ struct ip6_hdr *ip6;
+
+ MGETHDR(m, M_NOWAIT, MT_HEADER);
+ if (m == NULL) {
+ SC2IFP(sc)->if_oerrors++;
+ carpstats.carps_onomem++;
+ /* XXX maybe less ? */
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
+ carp_send_ad, sc);
+ return;
+ }
+ len = sizeof(*ip6) + sizeof(ch);
+ m->m_pkthdr.len = len;
+ m->m_pkthdr.rcvif = NULL;
+ m->m_len = len;
+ MH_ALIGN(m, m->m_len);
+ m->m_flags |= M_MCAST;
+ ip6 = mtod(m, struct ip6_hdr *);
+ bzero(ip6, sizeof(*ip6));
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_hlim = CARP_DFLTTL;
+ ip6->ip6_nxt = IPPROTO_CARP;
+ bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
+ sizeof(struct in6_addr));
+ /* set the multicast destination */
+
+ ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
+ ip6->ip6_dst.s6_addr8[15] = 0x12;
+ if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
+ SC2IFP(sc)->if_oerrors++;
+ m_freem(m);
+ CARP_LOG("%s: in6_setscope failed\n", __func__);
+ return;
+ }
+
+ ch_ptr = (struct carp_header *)(&ip6[1]);
+ bcopy(&ch, ch_ptr, sizeof(ch));
+ if (carp_prepare_ad(m, sc, ch_ptr))
+ return;
+
+ m->m_data += sizeof(*ip6);
+ ch_ptr->carp_cksum = carp_cksum(m, len - sizeof(*ip6));
+ m->m_data -= sizeof(*ip6);
+
+ getmicrotime(&SC2IFP(sc)->if_lastchange);
+ SC2IFP(sc)->if_opackets++;
+ SC2IFP(sc)->if_obytes += len;
+ carpstats.carps_opackets6++;
+
+ if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
+ SC2IFP(sc)->if_oerrors++;
+ if (sc->sc_sendad_errors < INT_MAX)
+ sc->sc_sendad_errors++;
+ if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1) {
+ CARP_SCUNLOCK(sc);
+ carp_send_ad_all();
+ CARP_SCLOCK(sc);
+ }
+ }
+ sc->sc_sendad_success = 0;
+ } else {
+ if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
+ if (++sc->sc_sendad_success >=
+ CARP_SENDAD_MIN_SUCCESS) {
+ carp_suppress_preempt--;
+ sc->sc_sendad_errors = 0;
+ }
+ } else
+ sc->sc_sendad_errors = 0;
+ }
+ }
+#endif /* INET6 */
+
+ if (advbase != 255 || advskew != 255)
+ callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
+ carp_send_ad, sc);
+
+}
+
+/*
+ * Broadcast a gratuitous ARP request containing
+ * the virtual router MAC address for each IP address
+ * associated with the virtual router.
+ */
+static void
+carp_send_arp(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+
+ if (ifa->ifa_addr->sa_family != AF_INET)
+ continue;
+ lwkt_serialize_enter(sc->sc_carpdev->if_serializer);
+ arp_ifinit2(sc->sc_carpdev, ifa, IF_LLADDR(sc->sc_ifp));
+ lwkt_serialize_exit(sc->sc_carpdev->if_serializer);
+
+ DELAY(1000); /* XXX */
+ }
+}
+
+#ifdef INET6
+static void
+carp_send_na(struct carp_softc *sc)
+{
+ struct ifaddr *ifa;
+ struct in6_addr *in6;
+ static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+
+ if (ifa->ifa_addr->sa_family != AF_INET6)
+ continue;
+
+ in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
+ nd6_na_output(sc->sc_carpdev, &mcast, in6,
+ ND_NA_FLAG_OVERRIDE, 1, NULL);
+ DELAY(1000); /* XXX */
+ }
+}
+#endif /* INET6 */
+
+static int
+carp_addrcount(struct carp_if *cif, struct in_ifaddr *ia, int type)
+{
+ struct carp_softc *vh;
+ struct ifaddr *ifa;
+ int count = 0;
+
+ CARP_LOCK_ASSERT(cif);
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((type == CARP_COUNT_RUNNING &&
+ (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) ||
+ (type == CARP_COUNT_MASTER && vh->sc_state == MASTER)) {
+ TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
+ ifa_list) {
+ if (ifa->ifa_addr->sa_family == AF_INET &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr)
+ count++;
+ }
+ }
+ }
+ return (count);
+}
+
+int
+carp_iamatch(void *v, struct in_ifaddr *ia,
+ struct in_addr *isaddr, u_int8_t **enaddr)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ int index, count = 0;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+
+ if (carp_opts[CARPCTL_ARPBALANCE]) {
+ /*
+ * XXX proof of concept implementation.
+ * We use the source ip to decide which virtual host should
+ * handle the request. If we're master of that virtual host,
+ * then we respond, otherwise, just drop the arp packet on
+ * the floor.
+ */
+ count = carp_addrcount(cif, ia, CARP_COUNT_RUNNING);
+ if (count == 0) {
+ /* should never reach this */
+ CARP_UNLOCK(cif);
+ return (0);
+ }
+
+ /* this should be a hash, like pf_hash() */
+ index = ntohl(isaddr->s_addr) % count;
+ count = 0;
+
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING)) {
+ TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist,
+ ifa_list) {
+ if (ifa->ifa_addr->sa_family ==
+ AF_INET &&
+ ia->ia_addr.sin_addr.s_addr ==
+ ifatoia(ifa)->ia_addr.sin_addr.s_addr) {
+ if (count == index) {
+ if (vh->sc_state ==
+ MASTER) {
+ *enaddr = IF_LLADDR(vh->sc_ifp);
+ CARP_UNLOCK(cif);
+ return (1);
+ } else {
+ CARP_UNLOCK(cif);
+ return (0);
+ }
+ }
+ count++;
+ }
+ }
+ }
+ }
+ } else {
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
+ vh->sc_state == MASTER) {
+ *enaddr = IF_LLADDR(vh->sc_ifp);
+ CARP_UNLOCK(cif);
+ return (1);
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+ return(0);
+}
+
+#ifdef INET6
+struct ifaddr *
+carp_iamatch6(void *v, struct in6_addr *taddr)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &SC2IFP(vh)->if_addrlist, ifa_list) {
+ if (IN6_ARE_ADDR_EQUAL(taddr,
+ &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ (SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
+ vh->sc_state == MASTER) {
+ CARP_UNLOCK(cif);
+ return (ifa);
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+
+ return (NULL);
+}
+
+void *
+carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
+{
+ struct m_tag *mtag;
+ struct carp_if *cif = v;
+ struct carp_softc *sc;
+ struct ifaddr *ifa;
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
+ TAILQ_FOREACH(ifa, &SC2IFP(sc)->if_addrlist, ifa_list) {
+ if (IN6_ARE_ADDR_EQUAL(taddr,
+ &ifatoia6(ifa)->ia_addr.sin6_addr) &&
+ (SC2IFP(sc)->if_flags & IFF_UP) && (SC2IFP(sc)->if_flags & IFF_RUNNING)) {
+ struct ifnet *ifp = SC2IFP(sc);
+ mtag = m_tag_get(PACKET_TAG_CARP,
+ sizeof(struct ifnet *), M_NOWAIT);
+ if (mtag == NULL) {
+ /* better a bit than nothing */
+ CARP_UNLOCK(cif);
+ return (IF_LLADDR(sc->sc_ifp));
+ }
+ bcopy(&ifp, (caddr_t)(mtag + 1),
+ sizeof(struct ifnet *));
+ m_tag_prepend(m, mtag);
+
+ CARP_UNLOCK(cif);
+ return (IF_LLADDR(sc->sc_ifp));
+ }
+ }
+ }
+ CARP_UNLOCK(cif);
+
+ return (NULL);
+}
+#endif
+
+struct ifnet *
+carp_forus(void *v, void *dhost)
+{
+ struct carp_if *cif = v;
+ struct carp_softc *vh;
+ u_int8_t *ena = dhost;
+
+ /**
+ * XXX: See here for check on MAC adr is not for virtual use
+ *
+ **/
+
+ if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1)
+ {
+ return (NULL);
+ }
+
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list)
+ if ((SC2IFP(vh)->if_flags & IFF_UP) && (SC2IFP(vh)->if_flags & IFF_RUNNING) &&
+ vh->sc_state == MASTER &&
+ !bcmp(dhost, IF_LLADDR(vh->sc_ifp), ETHER_ADDR_LEN)) {
+ CARP_UNLOCK(cif);
+ return (SC2IFP(vh));
+ }
+
+ CARP_UNLOCK(cif);
+ return (NULL);
+}
+
+static void
+carp_master_down(void *v)
+{
+ struct carp_softc *sc = v;
+
+ lwkt_serialize_enter(sc->sc_ifp->if_serializer);
+ carp_master_down_locked(sc);
+ lwkt_serialize_exit(sc->sc_ifp->if_serializer);
+}
+
+static void
+carp_master_down_locked(struct carp_softc *sc)
+{
+ if (sc->sc_carpdev)
+ CARP_SCLOCK_ASSERT(sc);
+
+ switch (sc->sc_state) {
+ case INIT:
+ kprintf("%s: master_down event in INIT state\n",
+ SC2IFP(sc)->if_xname);
+ break;
+ case MASTER:
+ break;
+ case BACKUP:
+ carp_set_state(sc, MASTER);
+ carp_send_ad_locked(sc);
+ carp_send_arp(sc);
+#ifdef INET6
+ carp_send_na(sc);
+#endif /* INET6 */
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_ADD);
+ break;
+ }
+}
+
+/*
+ * When in backup state, af indicates whether to reset the master down timer
+ * for v4 or v6. If it's set to zero, reset the ones which are already pending.
+ */
+static void
+carp_setrun(struct carp_softc *sc, sa_family_t af)
+{
+ struct timeval tv;
+
+ if (sc->sc_carpdev == NULL) {
+ SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
+ carp_set_state(sc, INIT);
+ return;
+ }
+
+ if (SC2IFP(sc)->if_flags & IFF_UP &&
+ sc->sc_vhid > 0 && (sc->sc_naddrs || sc->sc_naddrs6))
+ SC2IFP(sc)->if_flags |= IFF_RUNNING;
+ else {
+ SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
+ carp_setroute(sc, RTM_DELETE);
+ return;
+ }
+
+ switch (sc->sc_state) {
+ case INIT:
+ if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
+ carp_send_ad_locked(sc);
+ carp_send_arp(sc);
+#ifdef INET6
+ carp_send_na(sc);
+#endif /* INET6 */
+ CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
+ SC2IFP(sc)->if_xname);
+ carp_set_state(sc, MASTER);
+ carp_setroute(sc, RTM_ADD);
+ } else {
+ CARP_DEBUG("%s: INIT -> BACKUP\n", SC2IFP(sc)->if_xname);
+ carp_set_state(sc, BACKUP);
+ carp_setroute(sc, RTM_DELETE);
+ carp_setrun(sc, 0);
+ }
+ break;
+ case BACKUP:
+ callout_stop(&sc->sc_ad_tmo);
+ tv.tv_sec = 3 * sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
+ carp_master_down, sc);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
+ carp_master_down, sc);
+ break;
+#endif /* INET6 */
+ default:
+ if (sc->sc_naddrs)
+ callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
+ carp_master_down, sc);
+ if (sc->sc_naddrs6)
+ callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
+ carp_master_down, sc);
+ break;
+ }
+ break;
+ case MASTER:
+ tv.tv_sec = sc->sc_advbase;
+ tv.tv_usec = sc->sc_advskew * 1000000 / 256;
+ callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
+ carp_send_ad, sc);
+ break;
+ }
+}
+
+static void
+carp_multicast_cleanup(struct carp_softc *sc)
+{
+ struct ip_moptions *imo = &sc->sc_imo;
+ u_int16_t n = imo->imo_num_memberships;
+
+ /* Clean up our own multicast memberships */
+ while (n-- > 0) {
+ if (imo->imo_membership[n] != NULL) {
+ in_delmulti(imo->imo_membership[n]);
+ imo->imo_membership[n] = NULL;
+ }
+ }
+ imo->imo_num_memberships = 0;
+ imo->imo_multicast_ifp = NULL;
+}
+
+#ifdef INET6
+static void
+carp_multicast6_cleanup(struct carp_softc *sc)
+{
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ struct in6_multi_mship *imm =
+ LIST_FIRST(&im6o->im6o_memberships);
+
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ im6o->im6o_multicast_ifp = NULL;
+}
+#endif
+
+static int
+carp_set_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct in_ifaddr *ia, *ia_if;
+ struct ip_moptions *imo = &sc->sc_imo;
+ struct in_addr addr;
+ u_long iaddr = htonl(sin->sin_addr.s_addr);
+ int own, error;
+
+ if (sin->sin_addr.s_addr == 0)
+ {
+ if (!(SC2IFP(sc)->if_flags & IFF_UP))
+ {
+ carp_set_state(sc, INIT);
+ }
+ if (sc->sc_naddrs)
+ {
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ }
+ carp_setrun(sc, 0);
+ return (0);
+ }
+ /* we have to do it by hands to check we won't match on us */
+ ia_if = NULL; own = 0;
+ TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
+ /* and, yeah, we need a multicast-capable iface too */
+ if (ia->ia_ifp != SC2IFP(sc) &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+ (iaddr & ia->ia_subnetmask) == ia->ia_subnet) {
+ if (!ia_if)
+ ia_if = ia;
+ if (sin->sin_addr.s_addr ==
+ ia->ia_addr.sin_addr.s_addr)
+ own++;
+ }
+ }
+
+
+ if (!ia_if)
+ return (EADDRNOTAVAIL);
+
+ ia = ia_if;
+ ifp = ia->ia_ifp;
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+ (imo->imo_multicast_ifp && imo->imo_multicast_ifp != ifp))
+ return (EADDRNOTAVAIL);
+
+ if (imo->imo_num_memberships == 0) {
+ addr.s_addr = htonl(INADDR_CARP_GROUP);
+ if ((imo->imo_membership[0] = in_addmulti(&addr, ifp)) == NULL)
+ return (ENOBUFS);
+ imo->imo_num_memberships++;
+ imo->imo_multicast_ifp = ifp;
+ imo->imo_multicast_ttl = CARP_DFLTTL;
+ imo->imo_multicast_loop = 0;
+ }
+
+ if (!ifp->if_carp) {
+
+ MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
+ M_WAITOK|M_ZERO);
+ if (!cif) {
+ error = ENOBUFS;
+ goto cleanup;
+ }
+ if ((error = ifpromisc(ifp, 1))) {
+ FREE(cif, M_CARP);
+ goto cleanup;
+ }
+
+ CARP_LOCK_INIT(cif);
+ CARP_LOCK(cif);
+ cif->vhif_ifp = ifp;
+ TAILQ_INIT(&cif->vhif_vrs);
+ ifp->if_carp = cif;
+
+ } else {
+ struct carp_softc *vr;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+ CARP_UNLOCK(cif);
+ error = EINVAL;
+ goto cleanup;
+ }
+ }
+ sc->sc_ia = ia;
+ sc->sc_carpdev = ifp;
+
+ { /* XXX prevent endless loop if already in queue */
+ struct carp_softc *vr, *after = NULL;
+ int myself = 0;
+ cif = (struct carp_if *)ifp->if_carp;
+
+ /* XXX: cif should not change, right? So we still hold the lock */
+ CARP_LOCK_ASSERT(cif);
+
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+ if (vr == sc)
+ myself = 1;
+ if (vr->sc_vhid < sc->sc_vhid)
+ after = vr;
+ }
+
+ if (!myself) {
+ /* We're trying to keep things in order */
+ if (after == NULL) {
+ TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+ } else {
+ TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+ }
+ cif->vhif_nvrs++;
+ }
+ }
+
+ sc->sc_naddrs++;
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ if (own)
+ sc->sc_advskew = 0;
+
+
+ carp_sc_state_locked(sc);
+ carp_setrun(sc, 0);
+
+ CARP_UNLOCK(cif);
+
+ return (0);
+
+cleanup:
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ return (error);
+
+}
+
+static int
+carp_del_addr(struct carp_softc *sc, struct sockaddr_in *sin)
+{
+ int error = 0;
+
+ if (!--sc->sc_naddrs) {
+ struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ struct ip_moptions *imo = &sc->sc_imo;
+
+ CARP_LOCK(cif);
+ callout_stop(&sc->sc_ad_tmo);
+ SC2IFP(sc)->if_flags &= ~IFF_UP;
+ SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
+ sc->sc_vhid = -1;
+ in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
+ imo->imo_multicast_ifp = NULL;
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ sc->sc_carpdev->if_carp = NULL;
+ CARP_LOCK_DESTROY(cif);
+ FREE(cif, M_IFADDR);
+ } else {
+ CARP_UNLOCK(cif);
+ }
+ }
+
+ return (error);
+}
+
+#ifdef INET6
+static int
+carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+ struct ifnet *ifp;
+ struct carp_if *cif;
+ struct in6_ifaddr *ia, *ia_if;
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+ struct in6_multi_mship *imm;
+ struct in6_addr in6;
+ int own, error;
+
+ if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
+ if (!(SC2IFP(sc)->if_flags & IFF_UP))
+ carp_set_state(sc, INIT);
+ if (sc->sc_naddrs6)
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ carp_setrun(sc, 0);
+ return (0);
+ }
+
+ /* we have to do it by hands to check we won't match on us */
+ ia_if = NULL; own = 0;
+ for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ if ((sin6->sin6_addr.s6_addr32[i] &
+ ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
+ (ia->ia_addr.sin6_addr.s6_addr32[i] &
+ ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
+ break;
+ }
+ /* and, yeah, we need a multicast-capable iface too */
+ if (ia->ia_ifp != SC2IFP(sc) &&
+ (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
+ (i == 4)) {
+ if (!ia_if)
+ ia_if = ia;
+ if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
+ &ia->ia_addr.sin6_addr))
+ own++;
+ }
+ }
+
+ if (!ia_if)
+ return (EADDRNOTAVAIL);
+ ia = ia_if;
+ ifp = ia->ia_ifp;
+
+ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
+ (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
+ return (EADDRNOTAVAIL);
+
+ if (!sc->sc_naddrs6) {
+ im6o->im6o_multicast_ifp = ifp;
+
+ /* join CARP multicast address */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr8[15] = 0x12;
+ if (in6_setscope(&in6, ifp, NULL) != 0)
+ goto cleanup;
+ if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+
+ /* join solicited multicast address */
+ bzero(&in6, sizeof(in6));
+ in6.s6_addr16[0] = htons(0xff02);
+ in6.s6_addr32[1] = 0;
+ in6.s6_addr32[2] = htonl(1);
+ in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
+ in6.s6_addr8[12] = 0xff;
+ if (in6_setscope(&in6, ifp, NULL) != 0)
+ goto cleanup;
+ if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
+ goto cleanup;
+ LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
+ }
+
+ if (!ifp->if_carp) {
+ MALLOC(cif, struct carp_if *, sizeof(*cif), M_CARP,
+ M_WAITOK|M_ZERO);
+ if (!cif) {
+ error = ENOBUFS;
+ goto cleanup;
+ }
+ if ((error = ifpromisc(ifp, 1))) {
+ FREE(cif, M_CARP);
+ goto cleanup;
+ }
+
+ CARP_LOCK_INIT(cif);
+ CARP_LOCK(cif);
+ cif->vhif_ifp = ifp;
+ TAILQ_INIT(&cif->vhif_vrs);
+ ifp->if_carp = cif;
+
+ } else {
+ struct carp_softc *vr;
+
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK(cif);
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
+ CARP_UNLOCK(cif);
+ error = EINVAL;
+ goto cleanup;
+ }
+ }
+ sc->sc_ia6 = ia;
+ sc->sc_carpdev = ifp;
+
+ { /* XXX prevent endless loop if already in queue */
+ struct carp_softc *vr, *after = NULL;
+ int myself = 0;
+ cif = (struct carp_if *)ifp->if_carp;
+ CARP_LOCK_ASSERT(cif);
+
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
+ if (vr == sc)
+ myself = 1;
+ if (vr->sc_vhid < sc->sc_vhid)
+ after = vr;
+ }
+
+ if (!myself) {
+ /* We're trying to keep things in order */
+ if (after == NULL) {
+ TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
+ } else {
+ TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
+ }
+ cif->vhif_nvrs++;
+ }
+ }
+
+ sc->sc_naddrs6++;
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ if (own)
+ sc->sc_advskew = 0;
+ carp_sc_state_locked(sc);
+ carp_setrun(sc, 0);
+
+ CARP_UNLOCK(cif);
+
+ return (0);
+
+cleanup:
+ /* clean up multicast memberships */
+ if (!sc->sc_naddrs6) {
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ imm = LIST_FIRST(&im6o->im6o_memberships);
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ }
+ return (error);
+}
+
+static int
+carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
+{
+ int error = 0;
+
+ if (!--sc->sc_naddrs6) {
+ struct carp_if *cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ struct ip6_moptions *im6o = &sc->sc_im6o;
+
+ CARP_LOCK(cif);
+ callout_stop(&sc->sc_ad_tmo);
+ SC2IFP(sc)->if_flags &= ~IFF_UP;
+ SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
+ sc->sc_vhid = -1;
+ while (!LIST_EMPTY(&im6o->im6o_memberships)) {
+ struct in6_multi_mship *imm =
+ LIST_FIRST(&im6o->im6o_memberships);
+
+ LIST_REMOVE(imm, i6mm_chain);
+ in6_leavegroup(imm);
+ }
+ im6o->im6o_multicast_ifp = NULL;
+ TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
+ if (!--cif->vhif_nvrs) {
+ CARP_LOCK_DESTROY(cif);
+ sc->sc_carpdev->if_carp = NULL;
+ FREE(cif, M_IFADDR);
+ } else
+ CARP_UNLOCK(cif);
+ }
+
+ return (error);
+}
+#endif /* INET6 */
+
+static int
+carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *creds)
+{
+ struct carp_softc *sc = ifp->if_softc, *vr;
+ struct carpreq carpr;
+ struct ifaddr *ifa;
+ struct ifreq *ifr;
+ struct ifaliasreq *ifra;
+ int locked = 0, error = 0;
+
+ ifa = (struct ifaddr *)addr;
+ ifra = (struct ifaliasreq *)addr;
+ ifr = (struct ifreq *)addr;
+
+
+ switch (cmd) {
+ case SIOCSIFADDR:
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+ sizeof(struct sockaddr));
+ error = carp_set_addr(sc, satosin(ifa->ifa_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ error = carp_set_addr6(sc, satosin6(ifa->ifa_addr));
+ break;
+#endif /* INET6 */
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCAIFADDR:
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ bcopy(ifa->ifa_addr, ifa->ifa_dstaddr,
+ sizeof(struct sockaddr));
+ error = carp_set_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ error = carp_set_addr6(sc, satosin6(&ifra->ifra_addr));
+ break;
+#endif /* INET6 */
+ default:
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCDIFADDR:
+ switch (ifa->ifa_addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = carp_del_addr(sc, satosin(&ifra->ifra_addr));
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ error = carp_del_addr6(sc, satosin6(&ifra->ifra_addr));
+ break;
+#endif /* INET6 */
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCSIFFLAGS:
+ if (sc->sc_carpdev) {
+ locked = 1;
+ CARP_SCLOCK(sc);
+ }
+ if (sc->sc_state != INIT && !(ifr->ifr_flags & IFF_UP)) {
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+ if (sc->sc_state == MASTER)
+ carp_send_ad_locked(sc);
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ } else if (sc->sc_state == INIT && (ifr->ifr_flags & IFF_UP)) {
+ SC2IFP(sc)->if_flags |= IFF_UP;
+ carp_setrun(sc, 0);
+ }
+ break;
+
+ case SIOCSVH:
+ error = suser(curthread);
+ if (error)
+ break;
+ if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr)))
+ break;
+ error = 1;
+ if (sc->sc_carpdev) {
+ locked = 1;
+ CARP_SCLOCK(sc);
+ }
+ if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) {
+ switch (carpr.carpr_state) {
+ case BACKUP:
+ callout_stop(&sc->sc_ad_tmo);
+ carp_set_state(sc, BACKUP);
+ carp_setrun(sc, 0);
+ carp_setroute(sc, RTM_DELETE);
+ break;
+ case MASTER:
+ carp_master_down_locked(sc);
+ break;
+ default:
+ break;
+ }
+ }
+ if (carpr.carpr_vhid > 0) {
+ if (carpr.carpr_vhid > 255) {
+ error = EINVAL;
+ break;
+ }
+ if (sc->sc_carpdev) {
+ struct carp_if *cif;
+ cif = (struct carp_if *)sc->sc_carpdev->if_carp;
+ TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list)
+ if (vr != sc &&
+ vr->sc_vhid == carpr.carpr_vhid)
+ return EEXIST;
+ }
+ sc->sc_vhid = carpr.carpr_vhid;
+ IF_LLADDR(sc->sc_ifp)[0] = 0;
+ IF_LLADDR(sc->sc_ifp)[1] = 0;
+ IF_LLADDR(sc->sc_ifp)[2] = 0x5e;
+ IF_LLADDR(sc->sc_ifp)[3] = 0;
+ IF_LLADDR(sc->sc_ifp)[4] = 1;
+ IF_LLADDR(sc->sc_ifp)[5] = sc->sc_vhid;
+ error--;
+ }
+ if (carpr.carpr_advbase > 0 || carpr.carpr_advskew > 0) {
+ if (carpr.carpr_advskew >= 255) {
+ error = EINVAL;
+ break;
+ }
+ if (carpr.carpr_advbase > 255) {
+ error = EINVAL;
+ break;
+ }
+ sc->sc_advbase = carpr.carpr_advbase;
+ sc->sc_advskew = carpr.carpr_advskew;
+ error--;
+ }
+ bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key));
+ if (error > 0)
+ error = EINVAL;
+ else {
+ error = 0;
+ carp_setrun(sc, 0);
+ }
+ break;
+
+ case SIOCGVH:
+ /* XXX: lockless read */
+ bzero(&carpr, sizeof(carpr));
+ carpr.carpr_state = sc->sc_state;
+ carpr.carpr_vhid = sc->sc_vhid;
+ carpr.carpr_advbase = sc->sc_advbase;
+ carpr.carpr_advskew = sc->sc_advskew;
+ error = suser(curthread);
+ if (error == 0)
+ bcopy(sc->sc_key, carpr.carpr_key,
+ sizeof(carpr.carpr_key));
+ error = copyout(&carpr, ifr->ifr_data, sizeof(carpr));
+ break;
+
+ default:
+ error = EINVAL;
+ }
+
+ if (locked)
+ CARP_SCUNLOCK(sc);
+
+ carp_hmac_prepare(sc);
+
+ return (error);
+}
+
+/*
+ * XXX: this is looutput. We should eventually use it from there.
+ */
+static int
+carp_looutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+ struct rtentry *rt)
+{
+ u_int32_t af;
+
+ M_ASSERTPKTHDR(m); /* check if we have the packet header */
+
+ if (rt && rt->rt_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
+ m_freem(m);
+ return (rt->rt_flags & RTF_BLACKHOLE ? 0 :
+ rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
+ }
+
+ ifp->if_opackets++;
+ ifp->if_obytes += m->m_pkthdr.len;
+
+ /* BPF writes need to be handled specially. */
+ if (dst->sa_family == AF_UNSPEC) {
+ bcopy(dst->sa_data, &af, sizeof(af));
+ dst->sa_family = af;
+ }
+
+#if 1 /* XXX */
+ switch (dst->sa_family) {
+ case AF_INET:
+ case AF_INET6:
+ case AF_IPX:
+ case AF_APPLETALK:
+ break;
+ default:
+ m_freem(m);
+ return (EAFNOSUPPORT);
+ }
+#endif
+ return(if_simloop(ifp, m, dst->sa_family, 0));
+}
+
+/*
+ * Start output on carp interface. This function should never be called.
+ */
+static void
+carp_start(struct ifnet *ifp)
+{
+#ifdef DEBUG
+ kprintf("%s: start called\n", ifp->if_xname);
+#endif
+}
+
+int
+carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *sa,
+ struct rtentry *rt)
+{
+ struct m_tag *mtag;
+ struct carp_softc *sc;
+ struct ifnet *carp_ifp;
+
+ if (!sa)
+ return (0);
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ break;
+#endif /* INET6 */
+ default:
+ return (0);
+ }
+
+ mtag = m_tag_find(m, PACKET_TAG_CARP, NULL);
+ if (mtag == NULL)
+ return (0);
+
+ bcopy(mtag + 1, &carp_ifp, sizeof(struct ifnet *));
+ sc = carp_ifp->if_softc;
+
+ /* Set the source MAC address to Virtual Router MAC Address */
+ switch (ifp->if_type) {
+ case IFT_ETHER:
+ case IFT_L2VLAN: {
+ struct ether_header *eh;
+
+ eh = mtod(m, struct ether_header *);
+ eh->ether_shost[0] = 0;
+ eh->ether_shost[1] = 0;
+ eh->ether_shost[2] = 0x5e;
+ eh->ether_shost[3] = 0;
+ eh->ether_shost[4] = 1;
+ eh->ether_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_FDDI: {
+ struct fddi_header *fh;
+
+ fh = mtod(m, struct fddi_header *);
+ fh->fddi_shost[0] = 0;
+ fh->fddi_shost[1] = 0;
+ fh->fddi_shost[2] = 0x5e;
+ fh->fddi_shost[3] = 0;
+ fh->fddi_shost[4] = 1;
+ fh->fddi_shost[5] = sc->sc_vhid;
+ }
+ break;
+ case IFT_ISO88025: {
+ struct iso88025_header *th;
+ th = mtod(m, struct iso88025_header *);
+ th->iso88025_shost[0] = 3;
+ th->iso88025_shost[1] = 0;
+ th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1);
+ th->iso88025_shost[4] = 0;
+ th->iso88025_shost[5] = 0;
+ }
+ break;
+ default:
+ kprintf("%s: carp is not supported for this interface type\n",
+ ifp->if_xname);
+ return (EOPNOTSUPP);
+ }
+
+ return (0);
+
+}
+
+static void
+carp_set_state(struct carp_softc *sc, int state)
+{
+
+ if (sc->sc_carpdev)
+ CARP_SCLOCK_ASSERT(sc);
+
+ if (sc->sc_state == state)
+ return;
+
+ sc->sc_state = state;
+ switch (state) {
+ case BACKUP:
+ SC2IFP(sc)->if_link_state = LINK_STATE_DOWN;
+ break;
+ case MASTER:
+ SC2IFP(sc)->if_link_state = LINK_STATE_UP;
+ break;
+ default:
+ SC2IFP(sc)->if_link_state = LINK_STATE_UNKNOWN;
+ break;
+ }
+ rt_ifmsg(SC2IFP(sc));
+}
+
+void
+carp_carpdev_state(void *v)
+{
+ struct carp_if *cif = v;
+
+ CARP_LOCK(cif);
+ carp_carpdev_state_locked(cif);
+ CARP_UNLOCK(cif);
+}
+
+static void
+carp_carpdev_state_locked(struct carp_if *cif)
+{
+ struct carp_softc *sc;
+
+ TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
+ carp_sc_state_locked(sc);
+}
+
+static void
+carp_sc_state_locked(struct carp_softc *sc)
+{
+ CARP_SCLOCK_ASSERT(sc);
+
+ if ( !(sc->sc_carpdev->if_flags & IFF_UP)) {
+ sc->sc_flags_backup = SC2IFP(sc)->if_flags;
+ SC2IFP(sc)->if_flags &= ~IFF_UP;
+ SC2IFP(sc)->if_flags &= ~IFF_RUNNING;
+ callout_stop(&sc->sc_ad_tmo);
+ callout_stop(&sc->sc_md_tmo);
+ callout_stop(&sc->sc_md6_tmo);
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ if (!sc->sc_suppress) {
+ carp_suppress_preempt++;
+ if (carp_suppress_preempt == 1) {
+ CARP_SCUNLOCK(sc);
+ carp_send_ad_all();
+ CARP_SCLOCK(sc);
+ }
+ }
+ sc->sc_suppress = 1;
+ } else {
+ SC2IFP(sc)->if_flags |= sc->sc_flags_backup;
+ carp_set_state(sc, INIT);
+ carp_setrun(sc, 0);
+ if (sc->sc_suppress)
+ carp_suppress_preempt--;
+ sc->sc_suppress = 0;
+ }
+
+ return;
+}
+
+static int
+carp_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ if_detach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
+ carp_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
+ if (if_detach_event_tag == NULL)
+ return (ENOMEM);
+
+ LIST_INIT(&carpif_list);
+ if_clone_attach(&carp_cloner);
+ break;
+
+ case MOD_UNLOAD:
+ EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag);
+ if_clone_detach(&carp_cloner);
+ break;
+
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+static moduledata_t carp_mod = {
+ "carp",
+ carp_modevent,
+ 0
+};
+
+DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
diff --git a/sys/netinet/ip_carp.h b/sys/netinet/ip_carp.h
new file mode 100644
index 0000000..5d3b6d1
--- /dev/null
+++ b/sys/netinet/ip_carp.h
@@ -0,0 +1,167 @@
+/* $Id$ */
+/* from $FreeBSD: src/sys/netinet/ip_carp.h,v 1.3 2006/12/01 18:37:41 imp Exp $ */
+/* from $OpenBSD: ip_carp.h,v 1.8 2004/07/29 22:12:15 mcbride Exp $ */
+
+/*
+ * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
+ * Copyright (c) 2003 Ryan McBride. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IP_CARP_H
+#define _IP_CARP_H
+
+/*
+ * The CARP header layout is as follows:
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Version| Type | VirtualHostID | AdvSkew | Auth Len |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | AdvBase | Checksum |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Counter (1) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Counter (2) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (1) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (2) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (3) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (4) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | SHA-1 HMAC (5) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ */
+
+struct carp_header {
+#if BYTE_ORDER == LITTLE_ENDIAN
+ u_int8_t carp_type:4,
+ carp_version:4;
+#endif
+#if BYTE_ORDER == BIG_ENDIAN
+ u_int8_t carp_version:4,
+ carp_type:4;
+#endif
+ u_int8_t carp_vhid; /* virtual host id */
+ u_int8_t carp_advskew; /* advertisement skew */
+ u_int8_t carp_authlen; /* size of counter+md, 32bit chunks */
+ u_int8_t carp_pad1; /* reserved */
+ u_int8_t carp_advbase; /* advertisement interval */
+ u_int16_t carp_cksum;
+ u_int32_t carp_counter[2];
+ unsigned char carp_md[20]; /* SHA1 HMAC */
+} __packed;
+
+#ifdef CTASSERT
+CTASSERT(sizeof(struct carp_header) == 36);
+#endif
+
+#define CARP_DFLTTL 255
+
+/* carp_version */
+#define CARP_VERSION 2
+
+/* carp_type */
+#define CARP_ADVERTISEMENT 0x01
+
+#define CARP_KEY_LEN 20 /* a sha1 hash of a passphrase */
+
+/* carp_advbase */
+#define CARP_DFLTINTV 1
+
+/*
+ * Statistics.
+ */
+struct carpstats {
+ uint64_t carps_ipackets; /* total input packets, IPv4 */
+ uint64_t carps_ipackets6; /* total input packets, IPv6 */
+ uint64_t carps_badif; /* wrong interface */
+ uint64_t carps_badttl; /* TTL is not CARP_DFLTTL */
+ uint64_t carps_hdrops; /* packets shorter than hdr */
+ uint64_t carps_badsum; /* bad checksum */
+ uint64_t carps_badver; /* bad (incl unsupp) version */
+ uint64_t carps_badlen; /* data length does not match */
+ uint64_t carps_badauth; /* bad authentication */
+ uint64_t carps_badvhid; /* bad VHID */
+ uint64_t carps_badaddrs; /* bad address list */
+
+ uint64_t carps_opackets; /* total output packets, IPv4 */
+ uint64_t carps_opackets6; /* total output packets, IPv6 */
+ uint64_t carps_onomem; /* no memory for an mbuf */
+ uint64_t carps_ostates; /* total state updates sent */
+
+ uint64_t carps_preempt; /* if enabled, preemptions */
+};
+
+/*
+ * Configuration structure for SIOCSVH SIOCGVH
+ */
+struct carpreq {
+ int carpr_state;
+#define CARP_STATES "INIT", "BACKUP", "MASTER"
+#define CARP_MAXSTATE 2
+ int carpr_vhid;
+ int carpr_advskew;
+ int carpr_advbase;
+ unsigned char carpr_key[CARP_KEY_LEN];
+};
+#define SIOCSVH _IOWR('i', 245, struct ifreq)
+#define SIOCGVH _IOWR('i', 246, struct ifreq)
+
+/*
+ * Names for CARP sysctl objects
+ */
+#define CARPCTL_ALLOW 1 /* accept incoming CARP packets */
+#define CARPCTL_PREEMPT 2 /* high-pri backup preemption mode */
+#define CARPCTL_LOG 3 /* log bad packets */
+#define CARPCTL_STATS 4 /* statistics (read-only) */
+#define CARPCTL_ARPBALANCE 5 /* balance arp responses */
+#define CARPCTL_MAXID 6
+
+#define CARPCTL_NAMES { \
+ { 0, 0 }, \
+ { "allow", CTLTYPE_INT }, \
+ { "preempt", CTLTYPE_INT }, \
+ { "log", CTLTYPE_INT }, \
+ { "stats", CTLTYPE_STRUCT }, \
+ { "arpbalance", CTLTYPE_INT }, \
+}
+
+#ifdef _KERNEL
+void carp_carpdev_state(void *);
+void carp_input (struct mbuf *, int);
+int carp6_input (struct mbuf **, int *, int);
+int carp_output (struct ifnet *, struct mbuf *, struct sockaddr *,
+ struct rtentry *);
+int carp_iamatch (void *, struct in_ifaddr *, struct in_addr *,
+ u_int8_t **);
+struct ifaddr *carp_iamatch6(void *, struct in6_addr *);
+void *carp_macmatch6(void *, struct mbuf *, const struct in6_addr *);
+struct ifnet *carp_forus (void *, void *);
+#endif
+#endif /* _IP_CARP_H */
diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h
index 6c2809f..31da470 100644
--- a/sys/netinet/ip_var.h
+++ b/sys/netinet/ip_var.h
@@ -115,6 +115,7 @@ struct ip_moptions {
u_char imo_multicast_ttl; /* TTL for outgoing multicasts */
u_char imo_multicast_loop; /* 1 => hear sends if a member */
u_short imo_num_memberships; /* no. memberships this socket */
+ u_short imo_max_memberships; /* max memberships this socket */
struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS];
u_long imo_multicast_vif; /* vif num outgoing multicasts */
};
diff --git a/sys/netinet6/in6.c b/sys/netinet6/in6.c
index ae76439..f84087b 100644
--- a/sys/netinet6/in6.c
+++ b/sys/netinet6/in6.c
@@ -231,7 +231,7 @@ done:
* rely on the cloning mechanism from the corresponding interface route
* any more.
*/
-static void
+void
in6_ifaddloop(struct ifaddr *ifa)
{
struct rtentry *rt;
@@ -249,7 +249,7 @@ in6_ifaddloop(struct ifaddr *ifa)
* Remove loopback rtentry of ownaddr generated by in6_ifaddloop(),
* if it exists.
*/
-static void
+void
in6_ifremloop(struct ifaddr *ifa)
{
struct in6_ifaddr *ia;
@@ -1614,6 +1614,39 @@ in6_ifinit(struct ifnet *ifp, struct in6
return (error);
}
+struct in6_multi_mship *
+in6_joingroup(ifp, addr, errorp)
+ struct ifnet *ifp;
+ struct in6_addr *addr;
+ int *errorp;
+{
+ struct in6_multi_mship *imm;
+
+ imm = kmalloc(sizeof(*imm), M_IPMADDR, M_NOWAIT);
+ if (!imm) {
+ *errorp = ENOBUFS;
+ return NULL;
+ }
+ imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp);
+ if (!imm->i6mm_maddr) {
+ /* *errorp is alrady set */
+ kfree(imm, M_IPMADDR);
+ return NULL;
+ }
+ return imm;
+}
+
+int
+in6_leavegroup(imm)
+ struct in6_multi_mship *imm;
+{
+
+ if (imm->i6mm_maddr)
+ in6_delmulti(imm->i6mm_maddr);
+ kfree(imm, M_IPMADDR);
+ return 0;
+}
+
/*
* Add an address to the list of IP6 multicast addresses for a
* given interface.
diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c
index 1e0bd15..94833c6 100644
--- a/sys/netinet6/in6_ifattach.c
+++ b/sys/netinet6/in6_ifattach.c
@@ -714,6 +714,7 @@ in6_ifattach(struct ifnet *ifp,
#endif
case IFT_PFLOG:
case IFT_PFSYNC:
+ case IFT_CARP:
return;
}
diff --git a/sys/netinet6/in6_proto.c b/sys/netinet6/in6_proto.c
index 1434cca..f2b430b 100644
--- a/sys/netinet6/in6_proto.c
+++ b/sys/netinet6/in6_proto.c
@@ -69,6 +69,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/socket.h>
@@ -140,6 +141,11 @@
#include <net/net_osdep.h>
+#ifdef CARP
+#include <netinet/ip_carp.h>
+#endif
+
+
/*
* TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
*/
@@ -247,6 +253,15 @@ struct ip6protosw inet6sw[] = {
0, 0, 0, 0,
&rip6_usrreqs
},
+#ifdef CARP
+{ SOCK_RAW, &inet6domain, IPPROTO_CARP, PR_ATOMIC|PR_ADDR,
+ carp6_input, rip6_output, 0, rip6_ctloutput,
+ 0,
+ 0, 0, 0, 0,
+ &rip6_usrreqs
+},
+#endif /* CARP */
+
/* raw wildcard */
{ SOCK_RAW, &inet6domain, 0, PR_ATOMIC|PR_ADDR,
rip6_input, rip6_output, 0, rip6_ctloutput,
diff --git a/sys/netinet6/in6_var.h b/sys/netinet6/in6_var.h
index 6420634..cbfe2cb 100644
--- a/sys/netinet6/in6_var.h
+++ b/sys/netinet6/in6_var.h
@@ -585,6 +585,9 @@ do { \
struct in6_multi *in6_addmulti (struct in6_addr *, struct ifnet *,
int *);
void in6_delmulti (struct in6_multi *);
+struct in6_multi_mship *in6_joingroup(struct ifnet *, struct in6_addr *, int *);
+int in6_leavegroup(struct in6_multi_mship *);
+
extern int in6_ifindex2scopeid (int);
extern int in6_mask2len (struct in6_addr *, u_char *);
extern void in6_len2mask (struct in6_addr *, int);
@@ -615,6 +618,8 @@ int in6_prefix_ioctl (struct socket *so,
int in6_prefix_add_ifid (int iilen, struct in6_ifaddr *ia);
void in6_prefix_remove_ifid (int iilen, struct in6_ifaddr *ia);
void in6_purgeprefix (struct ifnet *);
+void in6_ifremloop(struct ifaddr *);
+void in6_ifaddloop(struct ifaddr *);
int in6_is_addr_deprecated (struct sockaddr_in6 *);
struct inpcb;
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index 801ec0c..e295fa5 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -1994,6 +1994,9 @@ nd6_need_cache(struct ifnet *ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
+#ifdef IFT_CARP
+ case IFT_CARP:
+#endif
case IFT_GIF: /* XXX need more cases? */
return (1);
default:
diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 04f051b..4c70301 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -34,6 +34,7 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
+#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
@@ -71,6 +72,11 @@
#include <net/net_osdep.h>
+#ifdef CARP
+#include <netinet/ip_carp.h>
+#endif
+
+
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
@@ -102,7 +108,7 @@ nd6_ns_input(struct mbuf *m, int off, in
struct in6_addr taddr6;
struct in6_addr myaddr6;
char *lladdr = NULL;
- struct ifaddr *ifa;
+ struct ifaddr *ifa = NULL;
int lladdrlen = 0;
int anycast = 0, proxy = 0, tentative = 0;
int tlladdr;
@@ -201,7 +207,14 @@ nd6_ns_input(struct mbuf *m, int off, in
* (3) "tentative" address on which DAD is being performed.
*/
/* (1) and (3) check. */
+#ifdef CARP
+ if (ifp->if_carp)
+ ifa = carp_iamatch6(ifp->if_carp, &taddr6);
+ if (!ifa)
+ ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+#else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
+#endif
/* (2) check. */
if (!ifa) {
@@ -895,9 +908,16 @@ nd6_na_output(struct ifnet *ifp, const s
* lladdr in sdl0. If we are not proxying (sending NA for
* my address) use lladdr configured for the interface.
*/
- if (sdl0 == NULL)
+ if (sdl0 == NULL) {
+#ifdef CARP
+ if (ifp->if_carp)
+ mac = carp_macmatch6(ifp->if_carp, m, taddr6);
+ if (mac == NULL)
+ mac = nd6_ifptomac(ifp);
+#else
mac = nd6_ifptomac(ifp);
- else if (sdl0->sa_family == AF_LINK) {
+#endif
+ } else if (sdl0->sa_family == AF_LINK) {
struct sockaddr_dl *sdl;
sdl = (struct sockaddr_dl *)sdl0;
if (sdl->sdl_alen == ifp->if_addrlen)
@@ -949,6 +969,9 @@ nd6_ifptomac(struct ifnet *ifp)
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
+#ifdef IFT_CARP
+ case IFT_CARP:
+#endif
return ((caddr_t)(ifp + 1));
break;
default:
diff --git a/sys/netinet6/scope6.c b/sys/netinet6/scope6.c
index 96b0891..2957c58 100644
--- a/sys/netinet6/scope6.c
+++ b/sys/netinet6/scope6.c
@@ -296,3 +296,76 @@ scope6_addr2default(struct in6_addr *add
return (sid_default.s6id_list[in6_addrscope(addr)]);
}
+
+/*
+ * Determine the appropriate scope zone ID for in6 and ifp. If ret_id is
+ * non NULL, it is set to the zone ID. If the zone ID needs to be embedded
+ * in the in6_addr structure, in6 will be modified.
+ */
+int
+in6_setscope(struct in6_addr *in6, struct ifnet *ifp, u_int32_t *ret_id)
+{
+ int scope;
+ u_int32_t zoneid = 0;
+ struct scope6_id *sid;
+
+ lwkt_serialize_enter(ifp->if_serializer);
+
+ sid = SID(ifp);
+
+#ifdef DIAGNOSTIC
+ if (sid == NULL) { /* should not happen */
+ panic("in6_setscope: scope array is NULL");
+ /* NOTREACHED */
+ }
+#endif
+
+ /*
+ * special case: the loopback address can only belong to a loopback
+ * interface.
+ */
+ if (IN6_IS_ADDR_LOOPBACK(in6)) {
+ if (!(ifp->if_flags & IFF_LOOPBACK)) {
+ lwkt_serialize_exit(ifp->if_serializer);
+ return (EINVAL);
+ } else {
+ if (ret_id != NULL)
+ *ret_id = 0; /* there's no ambiguity */
+ lwkt_serialize_exit(ifp->if_serializer);
+ return (0);
+ }
+ }
+
+ scope = in6_addrscope(in6);
+
+ switch (scope) {
+ case IPV6_ADDR_SCOPE_NODELOCAL: /* should be interface index */
+ zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_NODELOCAL];
+ break;
+
+ case IPV6_ADDR_SCOPE_LINKLOCAL:
+ zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_LINKLOCAL];
+ break;
+
+ case IPV6_ADDR_SCOPE_SITELOCAL:
+ zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_SITELOCAL];
+ break;
+
+ case IPV6_ADDR_SCOPE_ORGLOCAL:
+ zoneid = sid->s6id_list[IPV6_ADDR_SCOPE_ORGLOCAL];
+ break;
+
+ default:
+ zoneid = 0; /* XXX: treat as global. */
+ break;
+ }
+ lwkt_serialize_exit(ifp->if_serializer);
+
+ if (ret_id != NULL)
+ *ret_id = zoneid;
+
+ if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_NODELOCAL(in6) )
+ in6->s6_addr16[1] = htons(zoneid & 0xffff); /* XXX */
+
+ return (0);
+}
diff --git a/sys/netinet6/scope6_var.h b/sys/netinet6/scope6_var.h
index 57c84c4..62427e3 100644
--- a/sys/netinet6/scope6_var.h
+++ b/sys/netinet6/scope6_var.h
@@ -65,6 +65,7 @@ void scope6_setdefault (struct ifnet *);
int scope6_get_default (struct scope6_id *);
u_int32_t scope6_in6_addrscope (struct in6_addr *);
u_int32_t scope6_addr2default (struct in6_addr *);
+int in6_setscope __P((struct in6_addr *, struct ifnet *, u_int32_t *));
#endif /* _KERNEL */
diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
index 6391553..0774545 100644
--- a/sys/sys/mbuf.h
+++ b/sys/sys/mbuf.h
@@ -560,6 +560,7 @@ m_getb(int len, int how, int type, int f
/* struct ip6aux */
#define PACKET_TAG_IPFW_DIVERT 9 /* divert info */
/* uint16_t */
+#define PACKET_TAG_CARP 28 /* CARP info */
/*
* As a temporary and low impact solution to replace the even uglier
diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c
index 1158082..47e76c7 100644
--- a/usr.bin/netstat/inet.c
+++ b/usr.bin/netstat/inet.c
@@ -47,6 +47,7 @@
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
+#include <netinet/ip_carp.h>
#ifdef INET6
#include <netinet/ip6.h>
#endif /* INET6 */
@@ -584,6 +585,50 @@ udp_stats(u_long off __unused, char *nam
#undef p1a
}
+/*
+ * Dump CARP statistics structure.
+ */
+void
+carp_stats(u_long off, const char *name, int af1 __unused)
+{
+ struct carpstats carpstat, zerostat;
+ size_t len = sizeof(struct carpstats);
+
+ if (zflag)
+ memset(&zerostat, 0, len);
+ if (sysctlbyname("net.inet.carp.stats", &carpstat, &len,
+ zflag ? &zerostat : NULL, zflag ? len : 0) < 0) {
+ warn("sysctl: net.inet.carp.stats");
+ return;
+ }
+
+ printf("%s:\n", name);
+
+#define p(f, m) if (carpstat.f || sflag <= 1) \
+ printf(m, (unsigned long long)carpstat.f, plural((int)carpstat.f))
+#define p2(f, m) if (carpstat.f || sflag <= 1) \
+ printf(m, (unsigned long long)carpstat.f)
+
+ p(carps_ipackets, "\t%llu packet%s received (IPv4)\n");
+ p(carps_ipackets6, "\t%llu packet%s received (IPv6)\n");
+ p(carps_badttl, "\t\t%llu packet%s discarded for wrong TTL\n");
+ p(carps_hdrops, "\t\t%llu packet%s shorter than header\n");
+ p(carps_badsum, "\t\t%llu discarded for bad checksum%s\n");
+ p(carps_badver, "\t\t%llu discarded packet%s with a bad version\n");
+ p2(carps_badlen, "\t\t%llu discarded because packet too short\n");
+ p2(carps_badauth, "\t\t%llu discarded for bad authentication\n");
+ p2(carps_badvhid, "\t\t%llu discarded for bad vhid\n");
+ p2(carps_badaddrs, "\t\t%llu discarded because of a bad address list\n");
+ p(carps_opackets, "\t%llu packet%s sent (IPv4)\n");
+ p(carps_opackets6, "\t%llu packet%s sent (IPv6)\n");
+ p2(carps_onomem, "\t\t%llu send failed due to mbuf memory error\n");
+#if notyet
+ p(carps_ostates, "\t\t%s state update%s sent\n");
+#endif
+#undef p
+#undef p2
+}
+
/*
* Dump IP statistics structure.
*/
diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c
index b487811..2004d93 100644
--- a/usr.bin/netstat/main.c
+++ b/usr.bin/netstat/main.c
@@ -148,6 +148,8 @@ static struct nlist nl[] = {
{ "_rttrash" },
#define N_NCPUS 43
{ "_ncpus" },
+#define N_CARPSTAT 44
+ { "_carpstats" },
{ "" },
};
@@ -179,6 +181,8 @@ struct protox {
{ -1, N_IPSECSTAT, 1, 0,
ipsec_stats, NULL, "ipsec", 0},
#endif
+ { -1, N_CARPSTAT, 1, 0,
+ carp_stats, NULL, "carp", 0},
{ -1, -1, 0, 0,
0, NULL, 0 }
};
diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h
index f0329cd..b7cd79e 100644
--- a/usr.bin/netstat/netstat.h
+++ b/usr.bin/netstat/netstat.h
@@ -73,6 +73,7 @@ void ip_stats (u_long, char *, int);
void icmp_stats (u_long, char *, int);
void igmp_stats (u_long, char *, int);
void pim_stats (u_long, char *, int);
+void carp_stats (u_long, const char *, int);
#ifdef IPSEC
void ipsec_stats (u_long, char *, int);
#endif
[
Date Prev][
Date Next]
[
Thread Prev][
Thread Next]
[
Date Index][
Thread Index]