DragonFly submit List (threaded) for 2009-07
[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index]

Autosizing TCP buffers

From:	Peter Avalos <pavalos@xxxxxxxxxxxx>
Date:	Sat, 25 Jul 2009 14:14:33 -1000

Here's a patch that autosizes TCP buffers which increases network
performance over high-latency links.  Please review.  I'm leaving the
country for a couple months, so I may push this in a couple days if I
don't get any problem reports.

Thanks,
Peter

From 10f5d0715d8103d96a43ff223d33ec7e74ba2a00 Mon Sep 17 00:00:00 2001
From: Peter Avalos <pavalos@theshell.com>
Date: Sun, 19 Jul 2009 15:50:00 -1000
Subject: Implement autosizing TCP socket buffers.

Normally the socket buffers are static (either derived from global
defaults or set with setsockopt) and do not adapt to real network
conditions. Two things happen: a) your socket buffers are too small
and you can't reach the full potential of the network between both
hosts; b) your socket buffers are too big and you waste a lot of
kernel memory for data just sitting around.

With automatic TCP send and receive socket buffers we can start with a
small buffer and quickly grow it in parallel with the TCP congestion
window to match real network conditions.

New sysctls are:
  net.inet.tcp.sendbuf_auto=1 (enabled)
  net.inet.tcp.sendbuf_inc=8192 (8K, step size)
  net.inet.tcp.sendbuf_max=16777216 (16M, growth limit)
  net.inet.tcp.recvbuf_auto=1 (enabled)
  net.inet.tcp.recvbuf_inc=16384 (16K, step size)
  net.inet.tcp.recvbuf_max=16777216 (16M, growth limit)

Additionally, add limiters and sanity checks for TCP MSS (maximum segement
size) resource exhaustion attacks.

Obtained-from: FreeBSD
---
 sys/kern/uipc_socket.c   |    6 ++
 sys/kern/uipc_socket2.c  |    8 ++-
 sys/kern/uipc_usrreq.c   |    2 +-
 sys/netinet/tcp.h        |   19 ++++++
 sys/netinet/tcp_input.c  |  145 +++++++++++++++++++++++++++++++++++++++++++++-
 sys/netinet/tcp_output.c |   69 +++++++++++++++++++++-
 sys/netinet/tcp_subr.c   |   24 ++++++++
 sys/netinet/tcp_usrreq.c |    5 +-
 sys/netinet/tcp_var.h    |   11 ++++
 sys/sys/socketvar.h      |    1 +
 10 files changed, 281 insertions(+), 9 deletions(-)

diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 76bfdc6..ab41e2d 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -197,6 +197,10 @@ socreate(int dom, struct socket **aso, int type,
 	ai.sb_rlimit = &p->p_rlimit[RLIMIT_SBSIZE];
 	ai.p_ucred = p->p_ucred;
 	ai.fd_rdir = p->p_fd->fd_rdir;
+	/*
+	 * Auto-sizing of socket buffers is managed by the protocols and
+	 * the appropriate flags must be set in the pru_attach function.
+	 */
 	error = so_pru_attach(so, proto, &ai);
 	if (error) {
 		so->so_state |= SS_NOFDREF;
@@ -1371,6 +1375,8 @@ sosetopt(struct socket *so, struct sockopt *sopt)
 					error = ENOBUFS;
 					goto bad;
 				}
+				(sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
+				    &so->so_rcv)->ssb_flags &= ~SSB_AUTOSIZE;
 				break;
 
 			/*
diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c
index f52b21e..094e693 100644
--- a/sys/kern/uipc_socket2.c
+++ b/sys/kern/uipc_socket2.c
@@ -245,7 +245,6 @@ sonewconn(struct socket *head, int connstatus)
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_proto = head->so_proto;
-	so->so_timeo = head->so_timeo;
 	so->so_cred = crhold(head->so_cred);
 	ai.sb_rlimit = NULL;
 	ai.p_ucred = NULL;
@@ -256,7 +255,12 @@ sonewconn(struct socket *head, int connstatus)
 		sodealloc(so);
 		return (NULL);
 	}
-
+	so->so_rcv.ssb_lowat = head->so_rcv.ssb_lowat;
+	so->so_snd.ssb_lowat = head->so_snd.ssb_lowat;
+	so->so_rcv.ssb_timeo = head->so_rcv.ssb_timeo;
+	so->so_snd.ssb_timeo = head->so_snd.ssb_timeo;
+	so->so_rcv.ssb_flags |= head->so_rcv.ssb_flags & SSB_AUTOSIZE;
+	so->so_snd.ssb_flags |= head->so_snd.ssb_flags & SSB_AUTOSIZE;
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
 		so->so_state |= SS_COMP;
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 7d7b095..ed9d450 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -1353,7 +1353,7 @@ unp_gc_checkmarks(struct file *fp, void *data)
 	    !(so->so_proto->pr_flags & PR_RIGHTS))
 		return(0);
 #ifdef notdef
-	if (so->so_rcv.sb_flags & SB_LOCK) {
+	if (so->so_rcv.ssb_flags & SSB_LOCK) {
 		/*
 		 * This is problematical; it's not clear
 		 * we need to wait for the sockbuf to be
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index a534ef9..a8b7586 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -126,6 +126,25 @@ struct tcphdr {
 #define	TCP_MSS	1460
 
 /*
+ * TCP_MINMSS is defined to be 256 which is fine for the smallest
+ * link MTU (296 bytes, SLIP interface) in the Internet.
+ * However it is very unlikely to come across such low MTU interfaces
+ * these days (anno dato 2003).
+ * Probably it can be set to 512 without ill effects. But we play safe.
+ * See tcp_subr.c tcp_minmss SYSCTL declaration for more comments.
+ * Setting this to "0" disables the minmss check.
+ */
+#define	TCP_MINMSS 256
+/*
+ * TCP_MINMSSOVERLOAD is defined to be 1000 which should cover any type
+ * of interactive TCP session.
+ * See tcp_subr.c tcp_minmssoverload SYSCTL declaration and tcp_input.c
+ * for more comments.
+ * Setting this to "0" disables the minmssoverload check.
+ */
+#define	TCP_MINMSSOVERLOAD 1000
+
+/*
  * Default maximum segment size for TCP6.
  * With an IP6 MSS of 1280, this is 1220,
  * but 1024 is probably more convenient. (xxx kazu in doubt)
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index e5c759a..bf8b3d1 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -212,6 +212,20 @@ SYSCTL_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
     &tcp_reass_overflows, 0,
     "Global number of TCP Segment Reassembly Queue Overflows");
 
+int tcp_do_autorcvbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_auto, CTLFLAG_RW,
+    &tcp_do_autorcvbuf, 0, "Enable automatic receive buffer sizing");
+
+int tcp_autorcvbuf_inc = 16*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_inc, CTLFLAG_RW,
+    &tcp_autorcvbuf_inc, 0,
+    "Incrementor step size of automatic receive buffer");
+
+int tcp_autorcvbuf_max = 16*1024*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, recvbuf_max, CTLFLAG_RW,
+    &tcp_autorcvbuf_max, 0, "Max size of automatic receive buffer");
+
+
 static void	 tcp_dooptions(struct tcpopt *, u_char *, int, boolean_t);
 static void	 tcp_pulloutofband(struct socket *,
 		     struct tcphdr *, struct mbuf *, int);
@@ -1057,6 +1071,61 @@ after_listen:
 	KASSERT(tp->t_state != TCPS_LISTEN, ("tcp_input: TCPS_LISTEN state"));
 
 	/*
+	 * This is the second part of the MSS DoS prevention code (after
+	 * minmss on the sending side) and it deals with too many too small
+	 * tcp packets in a too short timeframe (1 second).
+	 *
+	 * For every full second we count the number of received packets
+	 * and bytes. If we get a lot of packets per second for this connection
+	 * (tcp_minmssoverload) we take a closer look at it and compute the
+	 * average packet size for the past second. If that is less than
+	 * tcp_minmss we get too many packets with very small payload which
+	 * is not good and burdens our system (and every packet generates
+	 * a wakeup to the process connected to our socket). We can reasonable
+	 * expect this to be small packet DoS attack to exhaust our CPU
+	 * cycles.
+	 *
+	 * Care has to be taken for the minimum packet overload value. This
+	 * value defines the minimum number of packets per second before we
+	 * start to worry. This must not be too low to avoid killing for
+	 * example interactive connections with many small packets like
+	 * telnet or SSH.
+	 *
+	 * Setting either tcp_minmssoverload or tcp_minmss to "0" disables
+	 * this check.
+	 *
+	 * Account for packet if payload packet, skip over ACK, etc.
+	 */
+	if (tcp_minmss && tcp_minmssoverload &&
+	    tp->t_state == TCPS_ESTABLISHED && tlen > 0) {
+		if (tp->rcv_second > ticks) {
+			tp->rcv_pps++;
+			tp->rcv_byps += tlen + off;
+			if (tp->rcv_pps > tcp_minmssoverload) {
+				if ((tp->rcv_byps / tp->rcv_pps) < tcp_minmss) {
+					kprintf("too many small tcp packets from "
+					       "%s:%u, av. %lubyte/packet, "
+					       "dropping connection\n",
+#ifdef INET6
+						isipv6 ?
+						ip6_sprintf(&inp->inp_inc.inc6_faddr) :
+#endif
+						inet_ntoa(inp->inp_inc.inc_faddr),
+						inp->inp_inc.inc_fport,
+						tp->rcv_byps / tp->rcv_pps);
+					tp = tcp_drop(tp, ECONNRESET);
+					tcpstat.tcps_minmssdrops++;
+					goto drop;
+				}
+			}
+		} else {
+			tp->rcv_second = ticks + hz;
+			tp->rcv_pps = 1;
+			tp->rcv_byps = tlen + off;
+		}
+	}
+
+	/*
 	 * Segment received on connection.
 	 * Reset idle time and keep-alive timer.
 	 */
@@ -1235,6 +1304,7 @@ after_listen:
 		    th->th_ack == tp->snd_una &&
 		    LIST_EMPTY(&tp->t_segq) &&
 		    tlen <= ssb_space(&so->so_rcv)) {
+			int newsize = 0;	/* automatic sockbuf scaling */
 			/*
 			 * This is a pure, in-sequence data packet
 			 * with nothing on the reassembly queue and
@@ -1245,12 +1315,73 @@ after_listen:
 			tcpstat.tcps_rcvpack++;
 			tcpstat.tcps_rcvbyte += tlen;
 			ND6_HINT(tp);	/* some progress has been done */
+		/*
+		 * Automatic sizing of receive socket buffer.  Often the send
+		 * buffer size is not optimally adjusted to the actual network
+		 * conditions at hand (delay bandwidth product).  Setting the
+		 * buffer size too small limits throughput on links with high
+		 * bandwidth and high delay (eg. trans-continental/oceanic links).
+		 *
+		 * On the receive side the socket buffer memory is only rarely
+		 * used to any significant extent.  This allows us to be much
+		 * more aggressive in scaling the receive socket buffer.  For
+		 * the case that the buffer space is actually used to a large
+		 * extent and we run out of kernel memory we can simply drop
+		 * the new segments; TCP on the sender will just retransmit it
+		 * later.  Setting the buffer size too big may only consume too
+		 * much kernel memory if the application doesn't read() from
+		 * the socket or packet loss or reordering makes use of the
+		 * reassembly queue.
+		 *
+		 * The criteria to step up the receive buffer one notch are:
+		 *  1. the number of bytes received during the time it takes
+		 *     one timestamp to be reflected back to us (the RTT);
+		 *  2. received bytes per RTT is within seven eighth of the
+		 *     current socket buffer size;
+		 *  3. receive buffer size has not hit maximal automatic size;
+		 *
+		 * This algorithm does one step per RTT at most and only if
+		 * we receive a bulk stream w/o packet losses or reorderings.
+		 * Shrinking the buffer during idle times is not necessary as
+		 * it doesn't consume any memory when idle.
+		 *
+		 * TODO: Only step up if the application is actually serving
+		 * the buffer to better manage the socket buffer resources.
+		 */
+			if (tcp_do_autorcvbuf &&
+			    to.to_tsecr &&
+			    (so->so_rcv.ssb_flags & SSB_AUTOSIZE)) {
+				if (to.to_tsecr > tp->rfbuf_ts &&
+				    to.to_tsecr - tp->rfbuf_ts < hz) {
+					if (tp->rfbuf_cnt >
+					    (so->so_rcv.ssb_hiwat / 8 * 7) &&
+					    so->so_rcv.ssb_hiwat <
+					    tcp_autorcvbuf_max) {
+						newsize =
+						    min(so->so_rcv.ssb_hiwat +
+						    tcp_autorcvbuf_inc,
+						    tcp_autorcvbuf_max);
+					}
+					/* Start over with next RTT. */
+					tp->rfbuf_ts = 0;
+					tp->rfbuf_cnt = 0;
+				} else
+					tp->rfbuf_cnt += tlen;	/* add up */
+			}
 			/*
 			 * Add data to socket buffer.
 			 */
 			if (so->so_state & SS_CANTRCVMORE) {
 				m_freem(m);
 			} else {
+				/*
+				 * Set new socket buffer size.
+				 * Give up when limit is reached.
+				 */
+				if (newsize)
+					if (!ssb_reserve(&so->so_rcv, newsize,
+					    so, NULL))
+						so->so_rcv.ssb_flags &= ~SSB_AUTOSIZE;
 				m_adj(m, drop_hdrlen); /* delayed header drop */
 				ssb_appendstream(&so->so_rcv, m);
 			}
@@ -1309,6 +1440,10 @@ after_listen:
 		recvwin = 0;
 	tp->rcv_wnd = imax(recvwin, (int)(tp->rcv_adv - tp->rcv_nxt));
 
+	/* Reset receive buffer auto scaling when not in bulk receive mode. */
+	tp->rfbuf_ts = 0;
+	tp->rfbuf_cnt = 0;
+
 	switch (tp->t_state) {
 	/*
 	 * If the state is SYN_RECEIVED:
@@ -2943,9 +3078,14 @@ tcp_mss(struct tcpcb *tp, int offer)
 	 * Offer == 0 means that there was no MSS on the SYN segment,
 	 * in this case we use tcp_mssdflt.
 	 */
-	if (offer == 0)
+	if (offer == 0) {
 		offer = (isipv6 ? tcp_v6mssdflt : tcp_mssdflt);
-	else
+	} else {
+		/*
+		 * Prevent DoS attack with too small MSS. Round up
+		 * to at least minmss.
+		 */
+		offer = max(offer, tcp_minmss);
 		/*
 		 * Sanity check: make sure that maxopd will be large
 		 * enough to allow some data on segments even is the
@@ -2953,6 +3093,7 @@ tcp_mss(struct tcpcb *tp, int offer)
 		 * funny things may happen in tcp_output.
 		 */
 		offer = max(offer, 64);
+	}
 	taop->tao_mssopt = offer;
 
 	/*
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
index d60f90e..bed538a 100644
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -128,6 +128,18 @@ static int avoid_pure_win_update = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, avoid_pure_win_update, CTLFLAG_RW,
 	&avoid_pure_win_update, 1, "Avoid pure window updates when possible");
 
+int tcp_do_autosndbuf = 1;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_RW,
+    &tcp_do_autosndbuf, 0, "Enable automatic send buffer sizing");
+
+int tcp_autosndbuf_inc = 8*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_RW,
+    &tcp_autosndbuf_inc, 0, "Incrementor step size of automatic send buffer");
+
+int tcp_autosndbuf_max = 16*1024*1024;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_RW,
+    &tcp_autosndbuf_max, 0, "Max size of automatic send buffer");
+
 /*
  * Tcp output routine: figure out what should be sent and send it.
  */
@@ -315,10 +327,57 @@ again:
 		}
 	}
 
+	KASSERT(len >= 0, ("%s: len < 0", __func__));
 	/*
-	 * len will be >= 0 after this point.  Truncate to the maximum
-	 * segment length and ensure that FIN is removed if the length
-	 * no longer contains the last data byte.
+	 * Automatic sizing of send socket buffer.  Often the send buffer
+	 * size is not optimally adjusted to the actual network conditions
+	 * at hand (delay bandwidth product).  Setting the buffer size too
+	 * small limits throughput on links with high bandwidth and high
+	 * delay (eg. trans-continental/oceanic links).  Setting the
+	 * buffer size too big consumes too much real kernel memory,
+	 * especially with many connections on busy servers.
+	 *
+	 * The criteria to step up the send buffer one notch are:
+	 *  1. receive window of remote host is larger than send buffer
+	 *     (with a fudge factor of 5/4th);
+	 *  2. send buffer is filled to 7/8th with data (so we actually
+	 *     have data to make use of it);
+	 *  3. send buffer fill has not hit maximal automatic size;
+	 *  4. our send window (slow start and cogestion controlled) is
+	 *     larger than sent but unacknowledged data in send buffer.
+	 *
+	 * The remote host receive window scaling factor may limit the
+	 * growing of the send buffer before it reaches its allowed
+	 * maximum.
+	 *
+	 * It scales directly with slow start or congestion window
+	 * and does at most one step per received ACK.  This fast
+	 * scaling has the drawback of growing the send buffer beyond
+	 * what is strictly necessary to make full use of a given
+	 * delay*bandwith product.  However testing has shown this not
+	 * to be much of an problem.  At worst we are trading wasting
+	 * of available bandwith (the non-use of it) for wasting some
+	 * socket buffer memory.
+	 *
+	 * TODO: Shrink send buffer during idle periods together
+	 * with congestion window.  Requires another timer.  Has to
+	 * wait for upcoming tcp timer rewrite.
+	 */
+	if (tcp_do_autosndbuf && so->so_snd.ssb_flags & SSB_AUTOSIZE) {
+		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.ssb_hiwat &&
+		    so->so_snd.ssb_cc >= (so->so_snd.ssb_hiwat / 8 * 7) &&
+		    so->so_snd.ssb_cc < tcp_autosndbuf_max &&
+		    sendwin >= (so->so_snd.ssb_cc - (tp->snd_nxt - tp->snd_una))) {
+			if (!ssb_reserve(&so->so_snd,
+			    min(so->so_snd.ssb_hiwat + tcp_autosndbuf_inc,
+			     tcp_autosndbuf_max), so, NULL))
+				so->so_snd.ssb_flags &= ~SSB_AUTOSIZE;
+		}
+	}
+
+	/*
+	 * Truncate to the maximum segment length and ensure that FIN is
+	 * removed if the length no longer contains the last data byte.
 	 */
 	if (len > tp->t_maxseg) {
 		len = tp->t_maxseg;
@@ -520,6 +579,10 @@ send:
 		optlen += TCPOLEN_TSTAMP_APPA;
 	}
 
+	/* Set receive buffer autosizing timestamp. */
+	if (tp->rfbuf_ts == 0 && (so->so_rcv.ssb_flags & SSB_AUTOSIZE))
+		tp->rfbuf_ts = ticks;
+
 	/*
 	 * Send `CC-family' options if our side wants to use them (TF_REQ_CC),
 	 * options are allowed (!TF_NOOPT) and it's not a RST.
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 817c258..2dc92af 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -178,6 +178,30 @@ SYSCTL_INT(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt, CTLFLAG_RW,
     &tcp_v6mssdflt, 0, "Default TCP Maximum Segment Size for IPv6");
 #endif
 
+/*
+ * Minimum MSS we accept and use. This prevents DoS attacks where
+ * we are forced to a ridiculous low MSS like 20 and send hundreds
+ * of packets instead of one. The effect scales with the available
+ * bandwidth and quickly saturates the CPU and network interface
+ * with packet generation and sending. Set to zero to disable MINMSS
+ * checking. This setting prevents us from sending too small packets.
+ */
+int tcp_minmss = TCP_MINMSS;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_RW,
+    &tcp_minmss , 0, "Minmum TCP Maximum Segment Size");
+/*
+ * Number of TCP segments per second we accept from remote host
+ * before we start to calculate average segment size. If average
+ * segment size drops below the minimum TCP MSS we assume a DoS
+ * attack and reset+drop the connection. Care has to be taken not to
+ * set this value too small to not kill interactive type connections
+ * (telnet, SSH) which send many small packets.
+ */
+int tcp_minmssoverload = TCP_MINMSSOVERLOAD;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmssoverload, CTLFLAG_RW,
+    &tcp_minmssoverload , 0, "Number of TCP Segments per Second allowed to"
+    "be under the MINMSS Size");
+
 #if 0
 static int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_RTTDFLT, rttdflt, CTLFLAG_RW,
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
index d0b27a2..4e92941 100644
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -1239,7 +1239,8 @@ tcp_ctloutput(struct socket *so, struct sockopt *sopt)
 			break;
 
 		case TCP_MAXSEG:
-			if (optval > 0 && optval <= tp->t_maxseg)
+			if (optval > 0 && optval <= tp->t_maxseg &&
+			    optval + 40 >= tcp_minmss)
 				tp->t_maxseg = optval;
 			else
 				error = EINVAL;
@@ -1314,6 +1315,8 @@ tcp_attach(struct socket *so, struct pru_attach_info *ai)
 		if (error)
 			return (error);
 	}
+	so->so_rcv.ssb_flags |= SSB_AUTOSIZE;
+	so->so_snd.ssb_flags |= SSB_AUTOSIZE;
 	cpu = mycpu->gd_cpuid;
 	error = in_pcballoc(so, &tcbinfo[cpu]);
 	if (error)
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index 4cb7894..9f6c34f 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -264,6 +264,14 @@ struct tcpcb {
 	u_long	snd_bwnd;		/* bandwidth-controlled window */
 	int	t_bw_rtttime;		/* used for bandwidth calculation */
 	tcp_seq	t_bw_rtseq;		/* used for bandwidth calculation */
+
+/* anti DoS counters */
+	u_long	rcv_second;		/* start of interval second */
+	u_long	rcv_pps;		/* received packets per second */
+	u_long	rcv_byps;		/* received bytes per second */
+
+	u_int32_t	rfbuf_ts;	/* recv buffer autoscaling timestamp */
+	int	rfbuf_cnt;		/* recv buffer autoscaling byte count */
 };
 
 #define	IN_FASTRECOVERY(tp)	(tp->t_flags & TF_FASTRECOVERY)
@@ -279,6 +287,7 @@ struct tcp_stats {
 	u_long	tcps_connects;		/* connections established */
 	u_long	tcps_drops;		/* connections dropped */
 	u_long	tcps_conndrops;		/* embryonic connections dropped */
+	u_long	tcps_minmssdrops;	/* average minmss too low drops */
 	u_long	tcps_closed;		/* conn. closed (includes drops) */
 	u_long	tcps_segstimed;		/* segs where we tried to get rtt */
 	u_long	tcps_rttupdated;	/* times we succeeded */
@@ -564,6 +573,8 @@ extern	struct inpcbinfo tcbinfo[];
 extern	struct tcpcbackqhead tcpcbackq[];
 
 extern	int tcp_mssdflt;	/* XXX */
+extern	int tcp_minmss;
+extern	int tcp_minmssoverload;
 extern	int tcp_delack_enabled;
 extern	int path_mtu_discovery;
 
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 4300a06..49354f7 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -84,6 +84,7 @@ struct signalsockbuf {
 #define SSB_KNOTE	0x100		/* kernel note attached */
 #define SSB_MEVENT	0x200		/* need message event notification */
 #define SSB_STOP	0x400		/* backpressure indicator */
+#define	SSB_AUTOSIZE	0x800		/* automatically size socket buffer */
 
 /*
  * Per-socket kernel structure.  Contains universal send and receive queues,
-- 
1.6.2.5

Attachment: pgp00000.pgp
Description: PGP signature

Follow-Ups:
- Re: Autosizing TCP buffers
  - From: Matthew Dillon <dillon@apollo.backplane.com>

[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index][Thread Index]