Discussion:
keepalive per socket settings patch
(too old to reply)
Christos Zoulas
2007-06-20 01:28:10 UTC
Permalink
Hi,

This is pretty straight forward... The question is do we let setsockopt
specify arbitrary values, or do we cap them to the global settings like
I do now? Comments? If there are no disagreements I will commit this after
I write the documentation. If there are, speak now and I won't bother
with the docs.

christos

Index: tcp.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp.h,v
retrieving revision 1.25
diff -u -u -r1.25 tcp.h
--- tcp.h 9 Oct 2006 16:27:07 -0000 1.25
+++ tcp.h 20 Jun 2007 01:16:48 -0000
@@ -112,10 +112,19 @@
/*
* User-settable options (used with setsockopt).
*/
-#define TCP_NODELAY 0x01 /* don't delay send to coalesce packets */
-#define TCP_MAXSEG 0x02 /* set maximum segment size */
-/* Bits 0x04, 0x08 reserved for FreeBSD compatibility: TCP_NOPUSH, TCP_NOOPT */
-#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
+#define TCP_NODELAY 1 /* don't delay send to coalesce packets */
+#define TCP_MAXSEG 2 /* set maximum segment size */
+#define TCP_KEEPIDLE 3
+#ifdef notyet
+#define TCP_NOPUSH 4 /* reserved for FreeBSD compat */
+#endif
+#define TCP_KEEPINTVL 5
+#define TCP_KEEPCNT 6
+#define TCP_KEEPINIT 7
+#ifdef notyet
+#define TCP_NOOPT 8 /* reserved for FreeBSD compat */
+#endif
+#define TCP_MD5SIG 0x10 /* use MD5 digests (RFC2385) */
#define TCP_CONGCTL 0x20 /* selected congestion control */

#endif /* !_NETINET_TCP_H_ */
Index: tcp_input.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_input.c,v
retrieving revision 1.266
diff -u -u -r1.266 tcp_input.c
--- tcp_input.c 18 May 2007 21:48:43 -0000 1.266
+++ tcp_input.c 20 Jun 2007 01:16:51 -0000
@@ -1606,7 +1606,7 @@
*/
tp->t_rcvtime = tcp_now;
if (TCPS_HAVEESTABLISHED(tp->t_state))
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);

/*
* Process options.
@@ -2366,9 +2366,9 @@
*/
if (so->so_state & SS_CANTRCVMORE) {
soisdisconnected(so);
- if (tcp_maxidle > 0)
+ if (tp->t_maxidle > 0)
TCP_TIMER_ARM(tp, TCPT_2MSL,
- tcp_maxidle);
+ tp->t_maxidle);
}
tp->t_state = TCPS_FIN_WAIT_2;
}
@@ -3377,7 +3377,7 @@
* than the keep alive timer would allow, expire it.
*/
sc->sc_rxttot += sc->sc_rxtcur;
- if (sc->sc_rxttot >= TCPTV_KEEP_INIT)
+ if (sc->sc_rxttot >= tcp_keepinit)
goto dropit;

tcpstat.tcps_sc_retransmitted++;
@@ -3713,7 +3713,7 @@
tcp_sendseqinit(tp);
tcp_rcvseqinit(tp);
tp->t_state = TCPS_SYN_RECEIVED;
- TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
tcpstat.tcps_accepts++;

if ((sc->sc_flags & SCF_SACK_PERMIT) && tcp_do_sack)
Index: tcp_subr.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.214
diff -u -u -r1.214 tcp_subr.c
--- tcp_subr.c 2 May 2007 20:40:25 -0000 1.214
+++ tcp_subr.c 20 Jun 2007 01:16:52 -0000
@@ -379,9 +379,6 @@
{
int hlen;

- /* Initialize the TCPCB template. */
- tcp_tcpcb_template();
-
in_pcbinit(&tcbtable, tcbhashsize, tcbhashsize);

hlen = sizeof(struct ip) + sizeof(struct tcphdr);
@@ -410,6 +407,9 @@
/* Initialize the congestion control algorithms. */
tcp_congctl_init();

+ /* Initialize the TCPCB template. */
+ tcp_tcpcb_template();
+
MOWNER_ATTACH(&tcp_tx_mowner);
MOWNER_ATTACH(&tcp_rx_mowner);
MOWNER_ATTACH(&tcp_reass_mowner);
@@ -976,6 +976,13 @@
tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << (TCP_RTTVAR_SHIFT + 2 - 1);
TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
TCPTV_MIN, TCPTV_REXMTMAX);
+
+ /* Keep Alive */
+ tp->t_keepinit = tcp_keepinit;
+ tp->t_keepidle = tcp_keepidle;
+ tp->t_keepintvl = tcp_keepintvl;
+ tp->t_keepcnt = tcp_keepcnt;
+ tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
}

/*
@@ -1049,7 +1056,7 @@

tp->t_congctl = tcp_congctl_global;
tp->t_congctl->refcnt++;
-
+
return (tp);
}

@@ -2016,7 +2023,7 @@
#endif

tp->t_state = TCPS_ESTABLISHED;
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);

#ifdef RTV_RPIPE
if (rt != NULL && rt->rt_rmx.rmx_recvpipe != 0)
Index: tcp_timer.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.76
diff -u -u -r1.76 tcp_timer.c
--- tcp_timer.c 9 Oct 2006 16:27:07 -0000 1.76
+++ tcp_timer.c 20 Jun 2007 01:16:52 -0000
@@ -148,11 +148,12 @@
* Various tunable timer parameters. These are initialized in tcp_init(),
* unless they are patched.
*/
-int tcp_keepidle = 0;
-int tcp_keepintvl = 0;
-int tcp_keepcnt = 0; /* max idle probes */
+u_int tcp_keepinit = 0;
+u_int tcp_keepidle = 0;
+u_int tcp_keepintvl = 0;
+u_int tcp_keepcnt = 0; /* max idle probes */
+
int tcp_maxpersistidle = 0; /* max idle time in persist */
-int tcp_maxidle; /* computed in tcp_slowtimo() */

/*
* Time to delay the ACK. This is initialized in tcp_init(), unless
@@ -179,6 +180,9 @@
tcp_timer_init(void)
{

+ if (tcp_keepinit == 0)
+ tcp_keepinit = TCPTV_KEEP_INIT;
+
if (tcp_keepidle == 0)
tcp_keepidle = TCPTV_KEEP_IDLE;

@@ -251,7 +255,6 @@
int s;

s = splsoftnet();
- tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
tcp_iss_seq += TCP_ISSINCR; /* increment iss */
tcp_now++; /* for timestamps */
splx(s);
@@ -542,9 +545,9 @@
KASSERT(so != NULL);
if (so->so_options & SO_KEEPALIVE &&
tp->t_state <= TCPS_CLOSE_WAIT) {
- if ((tcp_maxidle > 0) &&
+ if ((tp->t_maxidle > 0) &&
((tcp_now - tp->t_rcvtime) >=
- tcp_keepidle + tcp_maxidle))
+ tp->t_keepidle + tp->t_maxidle))
goto dropit;
/*
* Send a packet designed to force a response
@@ -572,9 +575,9 @@
(struct mbuf *)NULL, NULL, tp->rcv_nxt,
tp->snd_una - 1, 0);
}
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
} else
- TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);

#ifdef TCP_DEBUG
if (tp && so->so_options & SO_DEBUG)
@@ -634,8 +637,9 @@
* control block. Otherwise, check again in a bit.
*/
if (tp->t_state != TCPS_TIME_WAIT &&
- ((tcp_maxidle == 0) || ((tcp_now - tp->t_rcvtime) <= tcp_maxidle)))
- TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_keepintvl);
+ ((tp->t_maxidle == 0) ||
+ ((tcp_now - tp->t_rcvtime) <= tp->t_maxidle)))
+ TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_keepintvl);
else
tp = tcp_close(tp);

Index: tcp_timer.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.24
diff -u -u -r1.24 tcp_timer.h
--- tcp_timer.h 26 Sep 2006 06:39:22 -0000 1.24
+++ tcp_timer.h 20 Jun 2007 01:16:52 -0000
@@ -182,11 +182,11 @@

extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];

-extern int tcp_keepidle; /* time before keepalive probes begin */
-extern int tcp_keepintvl; /* time between keepalive probes */
-extern int tcp_keepcnt; /* number of keepalives, 0=infty */
+extern u_int tcp_keepinit; /* time before initial connection times out */
+extern u_int tcp_keepidle; /* time before keepalive probes begin */
+extern u_int tcp_keepintvl; /* time between keepalive probes */
+extern u_int tcp_keepcnt; /* number of keepalives, 0=infty */
extern int tcp_maxpersistidle; /* max idle time in persist */
-extern int tcp_maxidle; /* time to drop after starting probes */
extern int tcp_ttl; /* time to live for TCP segs */
extern const int tcp_backoff[];

Index: tcp_usrreq.c
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.131
diff -u -u -r1.131 tcp_usrreq.c
--- tcp_usrreq.c 4 Mar 2007 06:03:22 -0000 1.131
+++ tcp_usrreq.c 20 Jun 2007 01:16:53 -0000
@@ -436,7 +436,7 @@
soisconnecting(so);
tcpstat.tcps_connattempt++;
tp->t_state = TCPS_SYN_SENT;
- TCP_TIMER_ARM(tp, TCPT_KEEP, TCPTV_KEEP_INIT);
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
tp->iss = tcp_new_iss(tp, 0);
tcp_sendseqinit(tp);
error = tcp_output(tp);
@@ -614,6 +614,28 @@
return (error);
}

+static void
+change_keepalive(struct socket *so, struct tcpcb *tp)
+{
+ tp->t_maxidle = tp->t_keepcnt * tp->t_keepintvl;
+ TCP_TIMER_DISARM(tp, TCPT_KEEP);
+ TCP_TIMER_DISARM(tp, TCPT_2MSL);
+
+ if (tp->t_state == TCPS_SYN_RECEIVED ||
+ tp->t_state == TCPS_SYN_SENT) {
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepinit);
+ } else if (so->so_options & SO_KEEPALIVE &&
+ tp->t_state <= TCPS_CLOSE_WAIT) {
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepintvl);
+ } else {
+ TCP_TIMER_ARM(tp, TCPT_KEEP, tp->t_keepidle);
+ }
+
+ if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
+ TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
+}
+
+
int
tcp_ctloutput(int op, struct socket *so, int level, int optname,
struct mbuf **mp)
@@ -715,7 +737,8 @@
break;

case TCP_MAXSEG:
- if (m && (i = *mtod(m, int *)) > 0 &&
+ if (m && m->m_len >= sizeof(int) &&
+ (i = *mtod(m, int *)) > 0 &&
i <= tp->t_peermss)
tp->t_peermss = i; /* limit on send size */
else
@@ -729,6 +752,46 @@
#endif
break;

+ case TCP_KEEPIDLE:
+ if (m && m->m_len >= sizeof(int) &&
+ (i = *mtod(m, int *)) >= 0 &&
+ i <= tcp_keepidle) {
+ tp->t_keepidle = i;
+ change_keepalive(so, tp);
+ } else
+ error = EINVAL;
+ break;
+
+ case TCP_KEEPINTVL:
+ if (m && m->m_len >= sizeof(int) &&
+ (i = *mtod(m, int *)) >= 0 &&
+ i <= tcp_keepintvl) {
+ tp->t_keepintvl = i;
+ change_keepalive(so, tp);
+ } else
+ error = EINVAL;
+ break;
+
+ case TCP_KEEPCNT:
+ if (m && m->m_len >= sizeof(int) &&
+ (i = *mtod(m, int *)) >= 0 &&
+ i <= tcp_keepcnt) {
+ tp->t_keepcnt = i;
+ change_keepalive(so, tp);
+ } else
+ error = EINVAL;
+ break;
+
+ case TCP_KEEPINIT:
+ if (m && m->m_len >= sizeof(int) &&
+ (i = *mtod(m, int *)) >= 0 &&
+ i <= tcp_keepinit) {
+ tp->t_keepinit = i;
+ change_keepalive(so, tp);
+ } else
+ error = EINVAL;
+ break;
+
default:
error = ENOPROTOOPT;
break;
@@ -944,8 +1007,8 @@
* a full close, we start a timer to make sure sockets are
* not left in FIN_WAIT_2 forever.
*/
- if ((tp->t_state == TCPS_FIN_WAIT_2) && (tcp_maxidle > 0))
- TCP_TIMER_ARM(tp, TCPT_2MSL, tcp_maxidle);
+ if ((tp->t_state == TCPS_FIN_WAIT_2) && (tp->t_maxidle > 0))
+ TCP_TIMER_ARM(tp, TCPT_2MSL, tp->t_maxidle);
}
return (tp);
}
@@ -1418,6 +1481,27 @@
return error;
}

+static int
+sysctl_tcp_keep(SYSCTLFN_ARGS)
+{
+ int error;
+ u_int tmp;
+ struct sysctlnode node;
+
+ node = *rnode;
+ tmp = *(u_int *)rnode->sysctl_data;
+ node.sysctl_data = &tmp;
+
+ error = sysctl_lookup(SYSCTLFN_CALL(&node));
+ if (error || newp == NULL)
+ return error;
+
+ *(u_int *)rnode->sysctl_data = tmp;
+ tcp_tcpcb_template(); /* update the template */
+ return 0;
+}
+
+
/*
* this (second stage) setup routine is a replacement for tcp_sysctl()
* (which is currently used for ipv4 and ipv6)
@@ -1585,19 +1669,19 @@
CTLTYPE_INT, "keepidle",
SYSCTL_DESCR("Allowed connection idle ticks before a "
"keepalive probe is sent"),
- NULL, 0, &tcp_keepidle, 0,
+ sysctl_tcp_keep, 0, &tcp_keepidle, 0,
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPIDLE, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
CTLTYPE_INT, "keepintvl",
SYSCTL_DESCR("Ticks before next keepalive probe is sent"),
- NULL, 0, &tcp_keepintvl, 0,
+ sysctl_tcp_keep, 0, &tcp_keepintvl, 0,
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPINTVL, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
CTLTYPE_INT, "keepcnt",
SYSCTL_DESCR("Number of keepalive probes to send"),
- NULL, 0, &tcp_keepcnt, 0,
+ sysctl_tcp_keep, 0, &tcp_keepcnt, 0,
CTL_NET, pf, IPPROTO_TCP, TCPCTL_KEEPCNT, CTL_EOL);
sysctl_createv(clog, 0, NULL, NULL,
CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
@@ -1658,6 +1742,12 @@
sysctl_inpcblist, 0, &tcbtable, 0,
CTL_NET, pf, IPPROTO_TCP, CTL_CREATE,
CTL_EOL);
+ sysctl_createv(clog, 0, NULL, NULL,
+ CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
+ CTLTYPE_INT, "keepinit",
+ SYSCTL_DESCR("Ticks before initial tcp connection times out"),
+ sysctl_tcp_keep, 0, &tcp_keepinit, 0,
+ CTL_NET, pf, IPPROTO_TCP, CTL_CREATE, CTL_EOL);

/* ECN subtree */
sysctl_createv(clog, 0, NULL, &ecn_node,
Index: tcp_var.h
===================================================================
RCS file: /cvsroot/src/sys/netinet/tcp_var.h,v
retrieving revision 1.146
diff -u -u -r1.146 tcp_var.h
--- tcp_var.h 2 May 2007 20:40:25 -0000 1.146
+++ tcp_var.h 20 Jun 2007 01:16:54 -0000
@@ -331,6 +331,14 @@
uint8_t t_ecn_retries; /* # of ECN setup retries */

struct tcp_congctl *t_congctl; /* per TCB congctl algorithm */
+
+ /* Keepalive per socket */
+ u_int t_keepinit;
+ u_int t_keepidle;
+ u_int t_keepintvl;
+ u_int t_keepcnt;
+ u_int t_maxidle; /* t_keepcnt * t_keepintvl */
+
};

/*

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Darren Reed
2007-06-20 05:01:12 UTC
Permalink
Post by Christos Zoulas
Hi,
This is pretty straight forward... The question is do we let setsockopt
specify arbitrary values, or do we cap them to the global settings like
I do now? Comments? If there are no disagreements I will commit this after
I write the documentation. If there are, speak now and I won't bother
with the docs.
The most common criticism I've discussed with people about the
keep alive packets is the problem that you're addressing: different
applications want different keep alive values.

I don't think there should be an upper bound, as this reduces the
amount of extra traffic/work generated in a given period.

With IPFilter, the largest TCP idle timeout default is 5 days,
aimed at the ssh/telnet/etc window that you look at "every now
and then". I don't know if that is a good suggestion for where
to start with an upper limit or not.

The bigger problem is the lower bound....
Is it too much to allow an application to specify it as being 500ms?
And what if apache does this for all n000 connections?
Should that just be a configuration error or ...?

Darren




--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Christos Zoulas
2007-06-20 05:10:22 UTC
Permalink
On Jun 19, 10:01pm, ***@netbsd.org (Darren Reed) wrote:
-- Subject: Re: keepalive per socket settings patch

| Christos Zoulas wrote:
| > Hi,
| >
| > This is pretty straight forward... The question is do we let setsockopt
| > specify arbitrary values, or do we cap them to the global settings like
| > I do now? Comments? If there are no disagreements I will commit this after
| > I write the documentation. If there are, speak now and I won't bother
| > with the docs.
| >
|
| The most common criticism I've discussed with people about the
| keep alive packets is the problem that you're addressing: different
| applications want different keep alive values.
|
| I don't think there should be an upper bound, as this reduces the
| amount of extra traffic/work generated in a given period.
|
| With IPFilter, the largest TCP idle timeout default is 5 days,
| aimed at the ssh/telnet/etc window that you look at "every now
| and then". I don't know if that is a good suggestion for where
| to start with an upper limit or not.
|
| The bigger problem is the lower bound....
| Is it too much to allow an application to specify it as being 500ms?
| And what if apache does this for all n000 connections?
| Should that just be a configuration error or ...?

All good questions. The idea for the upper limit being the sysctl
value seemed reasonable at the time. After I implemented it, I
looked at linux and it does almost the same (for some of them it
defines max = tcp_keepfoo / PR_SLOWHZ). Can large values be used
as a DOS attack? What impact do small values have on the system?
The other thing that we can do, and I have not done yet, is to
define 0 as infinity (i.e. no timeout). Linux disallows 0; we accept
it and timeout immediately.

christos

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Darren Reed
2007-06-20 06:06:09 UTC
Permalink
Post by Christos Zoulas
-- Subject: Re: keepalive per socket settings patch
| > Hi,
| >
| > This is pretty straight forward... The question is do we let setsockopt
| > specify arbitrary values, or do we cap them to the global settings like
| > I do now? Comments? If there are no disagreements I will commit this after
| > I write the documentation. If there are, speak now and I won't bother
| > with the docs.
| >
|
| The most common criticism I've discussed with people about the
| keep alive packets is the problem that you're addressing: different
| applications want different keep alive values.
|
| I don't think there should be an upper bound, as this reduces the
| amount of extra traffic/work generated in a given period.
|
| With IPFilter, the largest TCP idle timeout default is 5 days,
| aimed at the ssh/telnet/etc window that you look at "every now
| and then". I don't know if that is a good suggestion for where
| to start with an upper limit or not.
|
| The bigger problem is the lower bound....
| Is it too much to allow an application to specify it as being 500ms?
| And what if apache does this for all n000 connections?
| Should that just be a configuration error or ...?
All good questions. The idea for the upper limit being the sysctl
value seemed reasonable at the time. After I implemented it, I
looked at linux and it does almost the same (for some of them it
defines max = tcp_keepfoo / PR_SLOWHZ). Can large values be used
as a DOS attack? What impact do small values have on the system?
The other thing that we can do, and I have not done yet, is to
define 0 as infinity (i.e. no timeout). Linux disallows 0; we accept
it and timeout immediately.
Can we make it behave differently if a NULL pointer is passed vs a value
of 0
and mimic the behaviour of select?
However, I don't know if getsockopt() could return the difference, could it?

Given that you can simply turn off keep alives for a connection, I can't
see how
large values could somehow introduce a new DoS vector that doesn't already
exist today.

Experimenting with small values and seeing what impact it has on both the
network and CPU when there are 1000s of connections could be of some
interest.

Darren



--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Greg Troxel
2007-06-20 13:58:23 UTC
Permalink
Post by Darren Reed
Post by Christos Zoulas
Hi,
This is pretty straight forward... The question is do we let setsockopt
specify arbitrary values, or do we cap them to the global settings like
I do now? Comments? If there are no disagreements I will commit this after
I write the documentation. If there are, speak now and I won't bother
with the docs.
I think that a program should be able to ask for values both higher and
lower than the global default setting.
Post by Darren Reed
The bigger problem is the lower bound....
Is it too much to allow an application to specify it as being 500ms?
And what if apache does this for all n000 connections?
Should that just be a configuration error or ...?
I think 500 ms is crazy; I bet if you went to IETF everyone would say
that's abusive. Thoughts:

keepalive interval should be well greater than RTT

keepalive intercal should only be short enough to keep broken nat
boxes working

if someone has truly odd needs they can rebuild and we can discuss

I would suggest 30 seconds as the minimum settable value, or maybe 15.

I suggest not limiting the high value. If an admin does want to force
apps that don't want keepalive, then that's another sysctl to override
policy but it seems unwarranted.


--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Joerg Sonnenberger
2007-06-20 14:16:00 UTC
Permalink
Post by Greg Troxel
keepalive interval should be well greater than RTT
In practise, the lower bound must be > RTT x max_retries. Using that
should be as good enough though.
Post by Greg Troxel
keepalive intercal should only be short enough to keep broken nat
boxes working
Using a low setting can be also be used to detect dead machines, so I
can understand why using the above could be a good idea.
Post by Greg Troxel
I suggest not limiting the high value. If an admin does want to force
apps that don't want keepalive, then that's another sysctl to override
policy but it seems unwarranted.
Agreed.

Joerg

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
der Mouse
2007-06-20 14:27:58 UTC
Permalink
Post by Greg Troxel
I think 500 ms is crazy; I bet if you went to IETF everyone would say
that's abusive.
For general-purpose use on the open Internet, I agree.

But for special-purpose applications like detecting dead machines on
small closed private networks...well, "Unix does not prevent you from
doing stupid things because that would also prevent you from doing
clever things".

This is not to say that I would consider it a bad thing if getting
below, say, 15 seconds required a kernel rebuild.
Post by Greg Troxel
If an admin does want to force apps that don't want keepalive, then
that's another sysctl to override policy but it seems unwarranted.
I'm not so sure. Back around the turn of the millenium, I hacked my
kernel to force keepalives on for all TCP connections, no matter what
the applicaiton specified, because I was stuck dealing with a NAT box
with an insanely low state timeout (something like 45 seconds) that
kept killing my connections. If I'd had a sysctl switch I could have
flipped instead of having to hack the source, I would have.

/~\ The ASCII der Mouse
\ / Ribbon Campaign
X Against HTML ***@rodents.montreal.qc.ca
/ \ Email! 7D C8 61 52 5D E7 2D 39 4E F1 31 3E E8 B3 27 4B

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Christos Zoulas
2007-06-20 14:34:48 UTC
Permalink
Post by Greg Troxel
Post by Darren Reed
Post by Christos Zoulas
Hi,
This is pretty straight forward... The question is do we let setsockopt
specify arbitrary values, or do we cap them to the global settings like
I do now? Comments? If there are no disagreements I will commit this after
I write the documentation. If there are, speak now and I won't bother
with the docs.
I think that a program should be able to ask for values both higher and
lower than the global default setting.
Post by Darren Reed
The bigger problem is the lower bound....
Is it too much to allow an application to specify it as being 500ms?
And what if apache does this for all n000 connections?
Should that just be a configuration error or ...?
I think 500 ms is crazy; I bet if you went to IETF everyone would say
The granularity is 1sec. Perhaps 0 should be disallowed for the interval.
Post by Greg Troxel
keepalive interval should be well greater than RTT
But you don't always know the RTT at the point you set it.
Post by Greg Troxel
keepalive intercal should only be short enough to keep broken nat
boxes working
if someone has truly odd needs they can rebuild and we can discuss
I would suggest 30 seconds as the minimum settable value, or maybe 15.
I would think that even 5 seconds is acceptable.
Post by Greg Troxel
I suggest not limiting the high value. If an admin does want to force
apps that don't want keepalive, then that's another sysctl to override
policy but it seems unwarranted.
I don't know. Linux does it. I don't see the downside of letting it
become bigger than the limits, so I will change it. I will also change
the minimum interval to 1 second.

christos


--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
john heasley
2007-06-20 15:29:39 UTC
Permalink
Post by Christos Zoulas
Post by Greg Troxel
I would suggest 30 seconds as the minimum settable value, or maybe 15.
I would think that even 5 seconds is acceptable.
If it were being used by something like a routing protocol, in lieu of a
home-grown keep-alive implementation, even 5 seconds (40s for the timeout
with default keepcnt) is rather long.

If there is concern about user-provoked DOS, apply a sysctl-able floor on
the interval.

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Christos Zoulas
2007-06-20 15:38:23 UTC
Permalink
Post by john heasley
Post by Christos Zoulas
Post by Greg Troxel
I would suggest 30 seconds as the minimum settable value, or maybe 15.
I would think that even 5 seconds is acceptable.
If it were being used by something like a routing protocol, in lieu of a
home-grown keep-alive implementation, even 5 seconds (40s for the timeout
with default keepcnt) is rather long.
If there is concern about user-provoked DOS, apply a sysctl-able floor on
the interval.
Thanks for all the feedback. I just committed the code and I allow everything
Post by john heasley
0.
christos


--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
David Laight
2007-06-20 20:05:35 UTC
Permalink
Post by Joerg Sonnenberger
Post by Greg Troxel
keepalive interval should be well greater than RTT
In practise, the lower bound must be > RTT x max_retries. Using that
should be as good enough though.
Post by Greg Troxel
keepalive intercal should only be short enough to keep broken nat
boxes working
Using a low setting can be also be used to detect dead machines, so I
can understand why using the above could be a good idea.
Except that it doesn't make any sense to set it lower than TCP
retransmition timer.

(Actually, and IIRC, all the keepalive timeout does is force the
retransmit timout sequence to start. Last time I timed this (on
a Linux box) it took 10s of minutes to disconnect after the first
keepalive got sent.)

David
--
David Laight: ***@l8s.co.uk

--
Posted automagically by a mail2news gateway at muc.de e.V.
Please direct questions, flames, donations, etc. to news-***@muc.de
Loading...