Unix: Rework of select-loop to poll-loop

This should lift the limit of FD_SETSIZE and allow more than 1024 fd's.
FD_SETSIZE limit doesn't matter now when creating new sockets.
This commit is contained in:
Jan Moskyto Matejka 2016-03-09 12:12:02 +01:00
parent ce95af7a5f
commit e1c13a5a7b

View file

@ -19,6 +19,7 @@
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/uio.h> #include <sys/uio.h>
#include <sys/un.h> #include <sys/un.h>
#include <poll.h>
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
#include <errno.h> #include <errno.h>
@ -41,12 +42,12 @@
#include "lib/sysio.h" #include "lib/sysio.h"
/* Maximum number of calls of tx handler for one socket in one /* Maximum number of calls of tx handler for one socket in one
* select iteration. Should be small enough to not monopolize CPU by * poll iteration. Should be small enough to not monopolize CPU by
* one protocol instance. * one protocol instance.
*/ */
#define MAX_STEPS 4 #define MAX_STEPS 4
/* Maximum number of calls of rx handler for all sockets in one select /* Maximum number of calls of rx handler for all sockets in one poll
iteration. RX callbacks are often much more costly so we limit iteration. RX callbacks are often much more costly so we limit
this to gen small latencies */ this to gen small latencies */
#define MAX_RX_STEPS 4 #define MAX_RX_STEPS 4
@ -1022,7 +1023,6 @@ sk_log_error(sock *s, const char *p)
static list sock_list; static list sock_list;
static struct birdsock *current_sock; static struct birdsock *current_sock;
static struct birdsock *stored_sock; static struct birdsock *stored_sock;
static int sock_recalc_fdsets_p;
static inline sock * static inline sock *
sk_next(sock *s) sk_next(sock *s)
@ -1078,7 +1078,6 @@ sk_free(resource *r)
if (s == stored_sock) if (s == stored_sock)
stored_sock = sk_next(s); stored_sock = sk_next(s);
rem_node(&s->n); rem_node(&s->n);
sock_recalc_fdsets_p = 1;
} }
} }
@ -1276,7 +1275,6 @@ static void
sk_insert(sock *s) sk_insert(sock *s)
{ {
add_tail(&sock_list, &s->n); add_tail(&sock_list, &s->n);
sock_recalc_fdsets_p = 1;
} }
static void static void
@ -1328,18 +1326,6 @@ sk_passive_connected(sock *s, int type)
log(L_WARN "SOCK: Cannot get remote IP address for TCP<"); log(L_WARN "SOCK: Cannot get remote IP address for TCP<");
} }
if (fd >= FD_SETSIZE)
{
/* FIXME: Call err_hook instead ? */
log(L_ERR "SOCK: Incoming connection from %I%J (port %d) %s",
t->daddr, ipa_is_link_local(t->daddr) ? t->iface : NULL,
t->dport, "rejected due to FD_SETSIZE limit");
close(fd);
t->fd = -1;
rfree(t);
return 1;
}
if (sk_setup(t) < 0) if (sk_setup(t) < 0)
{ {
/* FIXME: Call err_hook instead ? */ /* FIXME: Call err_hook instead ? */
@ -1416,9 +1402,6 @@ sk_open(sock *s)
if (fd < 0) if (fd < 0)
ERR("socket"); ERR("socket");
if (fd >= FD_SETSIZE)
ERR2("FD_SETSIZE limit reached");
s->af = af; s->af = af;
s->fd = fd; s->fd = fd;
@ -2062,15 +2045,15 @@ static int short_loops = 0;
void void
io_loop(void) io_loop(void)
{ {
fd_set rd, wr; int poll_tout;
struct timeval timo;
time_t tout; time_t tout;
int hi, events; int nfds, events;
sock *s; sock *s;
node *n; node *n;
int fdmax = 256;
struct pollfd *pfd = xmalloc(fdmax * sizeof(struct pollfd));
watchdog_start1(); watchdog_start1();
sock_recalc_fdsets_p = 1;
for(;;) for(;;)
{ {
events = ev_run_list(&global_event_list); events = ev_run_list(&global_event_list);
@ -2081,43 +2064,43 @@ io_loop(void)
tm_shot(); tm_shot();
continue; continue;
} }
timo.tv_sec = events ? 0 : MIN(tout - now, 3); poll_tout = (events ? 0 : MIN(tout - now, 3)) * 1000; /* Time in milliseconds */
timo.tv_usec = 0;
io_close_event(); io_close_event();
if (sock_recalc_fdsets_p) nfds = 0;
{
sock_recalc_fdsets_p = 0;
FD_ZERO(&rd);
FD_ZERO(&wr);
}
hi = 0;
WALK_LIST(n, sock_list) WALK_LIST(n, sock_list)
{ {
pfd[nfds] = (struct pollfd) { .fd = -1 }; /* everything other set to 0 by this */
s = SKIP_BACK(sock, n, n); s = SKIP_BACK(sock, n, n);
if (s->rx_hook) if (s->rx_hook)
{ {
FD_SET(s->fd, &rd); pfd[nfds].fd = s->fd;
if (s->fd > hi) pfd[nfds].events |= POLLIN;
hi = s->fd;
} }
else
FD_CLR(s->fd, &rd);
if (s->tx_hook && s->ttx != s->tpos) if (s->tx_hook && s->ttx != s->tpos)
{ {
FD_SET(s->fd, &wr); pfd[nfds].fd = s->fd;
if (s->fd > hi) pfd[nfds].events |= POLLOUT;
hi = s->fd; }
if (pfd[nfds].fd != -1)
{
s->index = nfds;
nfds++;
} }
else else
FD_CLR(s->fd, &wr); s->index = -1;
if (nfds >= fdmax)
{
fdmax *= 2;
pfd = xrealloc(pfd, fdmax * sizeof(struct pollfd));
}
} }
/* /*
* Yes, this is racy. But even if the signal comes before this test * Yes, this is racy. But even if the signal comes before this test
* and entering select(), it gets caught on the next timer tick. * and entering poll(), it gets caught on the next timer tick.
*/ */
if (async_config_flag) if (async_config_flag)
@ -2142,18 +2125,18 @@ io_loop(void)
continue; continue;
} }
/* And finally enter select() to find active sockets */ /* And finally enter poll() to find active sockets */
watchdog_stop(); watchdog_stop();
hi = select(hi+1, &rd, &wr, NULL, &timo); events = poll(pfd, nfds, poll_tout);
watchdog_start(); watchdog_start();
if (hi < 0) if (events < 0)
{ {
if (errno == EINTR || errno == EAGAIN) if (errno == EINTR || errno == EAGAIN)
continue; continue;
die("select: %m"); die("poll: %m");
} }
if (hi) if (events)
{ {
/* guaranteed to be non-empty */ /* guaranteed to be non-empty */
current_sock = SKIP_BACK(sock, n, HEAD(sock_list)); current_sock = SKIP_BACK(sock, n, HEAD(sock_list));
@ -2161,11 +2144,17 @@ io_loop(void)
while (current_sock) while (current_sock)
{ {
sock *s = current_sock; sock *s = current_sock;
if (s->index == -1)
{
current_sock = sk_next(s);
goto next;
}
int e; int e;
int steps; int steps;
steps = MAX_STEPS; steps = MAX_STEPS;
if ((s->type >= SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) if ((s->type >= SK_MAGIC) && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
do do
{ {
steps--; steps--;
@ -2177,7 +2166,7 @@ io_loop(void)
while (e && s->rx_hook && steps); while (e && s->rx_hook && steps);
steps = MAX_STEPS; steps = MAX_STEPS;
if (FD_ISSET(s->fd, &wr)) if (pfd[s->index].revents & POLLOUT)
do do
{ {
steps--; steps--;
@ -2204,13 +2193,17 @@ io_loop(void)
while (current_sock && count < MAX_RX_STEPS) while (current_sock && count < MAX_RX_STEPS)
{ {
sock *s = current_sock; sock *s = current_sock;
int e UNUSED; if (s->index == -1)
{
current_sock = sk_next(s);
goto next2;
}
if ((s->type < SK_MAGIC) && FD_ISSET(s->fd, &rd) && s->rx_hook) if ((s->type < SK_MAGIC) && (pfd[s->index].revents & (POLLIN | POLLHUP | POLLERR)) && s->rx_hook)
{ {
count++; count++;
io_log_event(s->rx_hook, s->data); io_log_event(s->rx_hook, s->data);
e = sk_read(s); sk_read(s);
if (s != current_sock) if (s != current_sock)
goto next2; goto next2;
} }