KRT: Scan routing tables separetely on linux to avoid congestion

Remove compile-time sysdep option CONFIG_ALL_TABLES_AT_ONCE, replace it
with runtime ability to run either separate table scans or shared scan.

On Linux, use separate table scans by default when the netlink socket
option NETLINK_GET_STRICT_CHK is available, but retreat to shared scan
when it fails.

Running separate table scans has advantages where some routing tables are
managed independently, e.g. when multiple routing daemons are running on
the same machine, as kernel routing table modification performance is
significantly reduced when the table is modified while it is being
scanned.

Thanks Daniel Gröber for the original patch and Toke Høiland-Jørgensen
for suggestions.
This commit is contained in:
Ondrej Zajicek 2022-07-24 02:15:20 +02:00
parent 971721c9b5
commit 534d0a4b44
5 changed files with 78 additions and 49 deletions

View file

@ -4,7 +4,6 @@ Available configuration variables:
CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel
CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us
CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables
CONFIG_ALL_TABLES_AT_ONCE Kernel scanner wants to process all tables at once
CONFIG_SINGLE_ROUTE There is only one route per network CONFIG_SINGLE_ROUTE There is only one route per network
CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field

View file

@ -9,7 +9,6 @@
#define CONFIG_AUTO_ROUTES #define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS #define CONFIG_SELF_CONSCIOUS
#define CONFIG_MULTIPLE_TABLES #define CONFIG_MULTIPLE_TABLES
#define CONFIG_ALL_TABLES_AT_ONCE
#define CONFIG_IP6_SADR_KERNEL #define CONFIG_IP6_SADR_KERNEL
#define CONFIG_MC_PROPER_SRC #define CONFIG_MC_PROPER_SRC

View file

@ -161,16 +161,13 @@ nl_open_sock(struct nl_sock *nl)
} }
} }
static void static int
nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED) nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
{ {
/*
* Strict checking is not necessary, it improves behavior on newer kernels.
* If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
* run-time), we can just ignore it.
*/
#ifdef SOL_NETLINK #ifdef SOL_NETLINK
setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict)); return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
#else
return -1;
#endif #endif
} }
@ -198,10 +195,17 @@ nl_cfg_rx_buffer_size(struct config *cfg)
static void static void
nl_open(void) nl_open(void)
{ {
if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
return;
nl_open_sock(&nl_scan); nl_open_sock(&nl_scan);
nl_open_sock(&nl_req); nl_open_sock(&nl_req);
nl_set_strict_dump(&nl_scan, 1); if (nl_set_strict_dump(&nl_scan, 1) < 0)
{
log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
krt_use_shared_scan();
}
} }
static void static void
@ -256,11 +260,13 @@ nl_request_dump_addr(int af)
} }
static void static void
nl_request_dump_route(int af) nl_request_dump_route(int af, int table_id)
{ {
struct { struct {
struct nlmsghdr nh; struct nlmsghdr nh;
struct rtmsg rtm; struct rtmsg rtm;
struct rtattr rta;
u32 table_id;
} req = { } req = {
.nh.nlmsg_type = RTM_GETROUTE, .nh.nlmsg_type = RTM_GETROUTE,
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)), .nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
@ -269,7 +275,17 @@ nl_request_dump_route(int af)
.rtm.rtm_family = af, .rtm.rtm_family = af,
}; };
send(nl_scan.fd, &req, sizeof(req), 0); if (table_id < 256)
req.rtm.rtm_table = table_id;
else
{
req.rta.rta_type = RTA_TABLE;
req.rta.rta_len = RTA_LENGTH(4);
req.table_id = table_id;
req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
}
send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
nl_scan.last_hdr = NULL; nl_scan.last_hdr = NULL;
} }
@ -1976,18 +1992,27 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
} }
void void
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */ krt_do_scan(struct krt_proto *p)
{ {
struct nlmsghdr *h; struct nlmsghdr *h;
struct nl_parse_state s; struct nl_parse_state s;
nl_parse_begin(&s, 1); nl_parse_begin(&s, 1);
nl_request_dump_route(AF_UNSPEC);
/* Table-specific scan or shared scan */
if (p)
nl_request_dump_route(p->af, krt_table_id(p));
else
nl_request_dump_route(AF_UNSPEC, 0);
while (h = nl_get_scan()) while (h = nl_get_scan())
{
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE) if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
nl_parse_route(&s, h); nl_parse_route(&s, h);
else else
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type); log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
}
nl_parse_end(&s); nl_parse_end(&s);
} }

View file

@ -783,18 +783,17 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new)
rte_free(e); rte_free(e);
} }
/* /*
* Periodic scanning * Periodic scanning
*/ */
static timer *krt_scan_all_timer;
#ifdef CONFIG_ALL_TABLES_AT_ONCE static int krt_scan_all_count;
static _Bool krt_scan_all_tables;
static timer *krt_scan_timer;
static int krt_scan_count;
static void static void
krt_scan(timer *t UNUSED) krt_scan_all(timer *t UNUSED)
{ {
struct krt_proto *p; struct krt_proto *p;
node *n; node *n;
@ -815,35 +814,42 @@ krt_scan(timer *t UNUSED)
} }
static void static void
krt_scan_timer_start(struct krt_proto *p) krt_scan_all_timer_start(struct krt_proto *p)
{ {
if (!krt_scan_count) if (!krt_scan_all_count)
krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0); krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0);
krt_scan_count++; krt_scan_all_count++;
tm_start(krt_scan_timer, 1 S); tm_start(krt_scan_all_timer, 1 S);
} }
static void static void
krt_scan_timer_stop(struct krt_proto *p UNUSED) krt_scan_all_timer_stop(void)
{ {
krt_scan_count--; ASSERT(krt_scan_all_count > 0);
if (!krt_scan_count) krt_scan_all_count--;
if (!krt_scan_all_count)
{ {
rfree(krt_scan_timer); rfree(krt_scan_all_timer);
krt_scan_timer = NULL; krt_scan_all_timer = NULL;
} }
} }
static void static void
krt_scan_timer_kick(struct krt_proto *p UNUSED) krt_scan_all_timer_kick(void)
{ {
tm_start(krt_scan_timer, 0); tm_start(krt_scan_all_timer, 0);
}
void
krt_use_shared_scan(void)
{
krt_scan_all_tables = 1;
} }
#else
static void static void
krt_scan(timer *t) krt_scan(timer *t)
@ -860,27 +866,34 @@ krt_scan(timer *t)
static void static void
krt_scan_timer_start(struct krt_proto *p) krt_scan_timer_start(struct krt_proto *p)
{
if (krt_scan_all_tables)
krt_scan_all_timer_start(p);
else
{ {
p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0); p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
tm_start(p->scan_timer, 1 S); tm_start(p->scan_timer, 1 S);
} }
}
static void static void
krt_scan_timer_stop(struct krt_proto *p) krt_scan_timer_stop(struct krt_proto *p)
{ {
if (krt_scan_all_tables)
krt_scan_all_timer_stop();
else
tm_stop(p->scan_timer); tm_stop(p->scan_timer);
} }
static void static void
krt_scan_timer_kick(struct krt_proto *p) krt_scan_timer_kick(struct krt_proto *p)
{ {
if (krt_scan_all_tables)
krt_scan_all_timer_kick();
else
tm_start(p->scan_timer, 0); tm_start(p->scan_timer, 0);
} }
#endif
/* /*
* Updates * Updates
@ -1016,11 +1029,6 @@ krt_postconfig(struct proto_config *CF)
if (! proto_cf_main_channel(CF)) if (! proto_cf_main_channel(CF))
cf_error("Channel not specified"); cf_error("Channel not specified");
#ifdef CONFIG_ALL_TABLES_AT_ONCE
if (krt_cf->scan_time != cf->scan_time)
cf_error("All kernel syncers must use the same table scan interval");
#endif
struct channel_config *cc = proto_cf_main_channel(CF); struct channel_config *cc = proto_cf_main_channel(CF);
struct rtable_config *tab = cc->table; struct rtable_config *tab = cc->table;
if (tab->krt_attached) if (tab->krt_attached)

View file

@ -52,10 +52,7 @@ struct krt_proto {
struct rtable *krt_table; /* Internal table of inherited routes */ struct rtable *krt_table; /* Internal table of inherited routes */
#endif #endif
#ifndef CONFIG_ALL_TABLES_AT_ONCE
timer *scan_timer; timer *scan_timer;
#endif
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */ struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
struct bmap seen_map; /* Routes seen during last periodic scan */ struct bmap seen_map; /* Routes seen during last periodic scan */
node krt_node; /* Node in krt_proto_list */ node krt_node; /* Node in krt_proto_list */
@ -76,6 +73,7 @@ extern pool *krt_pool;
struct proto_config * kif_init_config(int class); struct proto_config * kif_init_config(int class);
void kif_request_scan(void); void kif_request_scan(void);
void krt_use_shared_scan(void);
void krt_got_route(struct krt_proto *p, struct rte *e); void krt_got_route(struct krt_proto *p, struct rte *e);
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new); void krt_got_route_async(struct krt_proto *p, struct rte *e, int new);