diff --git a/lib/socket.h b/lib/socket.h index f1922607..53e89edd 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -51,6 +51,7 @@ void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */ void sk_dump_all(void); int sk_set_ttl(sock *s, int ttl); /* Set TTL for given socket */ int sk_set_md5_auth(sock *s, ip_addr a, char *passwd); /* Add or remove security associations for given passive socket */ +int sk_rx_ready(sock *s); static inline int sk_send_buffer_empty(sock *sk) diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index af5dbfcb..b76b7f97 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -431,8 +431,15 @@ bgp_hold_timeout(timer *t) { struct bgp_conn *conn = t->data; - DBG("BGP: Hold timeout, closing connection\n"); - bgp_error(conn, 4, 0, NULL, 0); + DBG("BGP: Hold timeout\n"); + + /* If there is something in input queue, we are probably congested + and perhaps just not processed BGP packets in time. */ + + if (sk_rx_ready(conn->sk) > 0) + bgp_start_timer(conn->hold_timer, 10); + else + bgp_error(conn, 4, 0, NULL, 0); } static void diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 99d33936..330aea4f 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -30,12 +30,17 @@ #include "lib/unix.h" #include "lib/sysio.h" -/* Maximum number of calls of rx/tx handler for one socket in one +/* Maximum number of calls of tx handler for one socket in one * select iteration. Should be small enough to not monopolize CPU by * one protocol instance. */ #define MAX_STEPS 4 +/* Maximum number of calls of rx handler for all sockets in one select + iteration. RX callbacks are often much more costly so we limit + this to gen small latencies */ +#define MAX_RX_STEPS 4 + /* * Tracked Files */ @@ -493,6 +498,7 @@ tm_format_reltime(char *x, bird_clock_t t) static list sock_list; static struct birdsock *current_sock; +static struct birdsock *stored_sock; static int sock_recalc_fdsets_p; static inline sock * @@ -541,6 +547,8 @@ sk_free(resource *r) close(s->fd); if (s == current_sock) current_sock = sk_next(s); + if (s == stored_sock) + stored_sock = sk_next(s); rem_node(&s->n); sock_recalc_fdsets_p = 1; } @@ -1071,6 +1079,29 @@ sk_maybe_write(sock *s) } } +int +sk_rx_ready(sock *s) +{ + fd_set rd, wr; + struct timeval timo; + int rv; + + FD_ZERO(&rd); + FD_ZERO(&wr); + FD_SET(s->fd, &rd); + + timo.tv_sec = 0; + timo.tv_usec = 0; + + redo: + rv = select(s->fd+1, &rd, &wr, NULL, &timo); + + if ((rv < 0) && (errno == EINTR || errno == EAGAIN)) + goto redo; + + return rv; +} + /** * sk_send - send data to a socket * @s: socket @@ -1239,6 +1270,9 @@ io_init(void) srandom((int) now_real); } +static int short_loops = 0; +#define SHORT_LOOP_MAX 10 + void io_loop(void) { @@ -1317,8 +1351,8 @@ io_loop(void) } /* And finally enter select() to find active sockets */ - hi = select(hi+1, &rd, &wr, NULL, &timo); + if (hi < 0) { if (errno == EINTR || errno == EAGAIN) @@ -1327,13 +1361,17 @@ io_loop(void) } if (hi) { - current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); /* guaranteed to be non-empty */ + /* guaranteed to be non-empty */ + current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); + while (current_sock) { sock *s = current_sock; int e; - int steps = MAX_STEPS; - if (FD_ISSET(s->fd, &rd) && s->rx_hook) + int steps; + + steps = MAX_STEPS; + if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) do { steps--; @@ -1356,6 +1394,35 @@ io_loop(void) current_sock = sk_next(s); next: ; } + + short_loops++; + if (events && (short_loops < SHORT_LOOP_MAX)) + continue; + short_loops = 0; + + int count = 0; + current_sock = stored_sock; + if (current_sock == NULL) + current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); + + while (current_sock && count < MAX_RX_STEPS) + { + sock *s = current_sock; + int e; + int steps; + + if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) + { + count++; + e = sk_read(s); + if (s != current_sock) + goto next2; + } + current_sock = sk_next(s); + next2: ; + } + + stored_sock = current_sock; } } }