KRT: Scan routing tables separetely on linux to avoid congestion
Remove compile-time sysdep option CONFIG_ALL_TABLES_AT_ONCE, replace it with runtime ability to run either separate table scans or shared scan. On Linux, use separate table scans by default when the netlink socket option NETLINK_GET_STRICT_CHK is available, but retreat to shared scan when it fails. Running separate table scans has advantages where some routing tables are managed independently, e.g. when multiple routing daemons are running on the same machine, as kernel routing table modification performance is significantly reduced when the table is modified while it is being scanned. Thanks Daniel Gröber for the original patch and Toke Høiland-Jørgensen for suggestions.
This commit is contained in:
parent
971721c9b5
commit
534d0a4b44
5 changed files with 78 additions and 49 deletions
|
@ -4,7 +4,6 @@ Available configuration variables:
|
||||||
CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel
|
CONFIG_AUTO_ROUTES Device routes are added automagically by the kernel
|
||||||
CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us
|
CONFIG_SELF_CONSCIOUS We're able to recognize whether route was installed by us
|
||||||
CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables
|
CONFIG_MULTIPLE_TABLES The kernel supports multiple routing tables
|
||||||
CONFIG_ALL_TABLES_AT_ONCE Kernel scanner wants to process all tables at once
|
|
||||||
CONFIG_SINGLE_ROUTE There is only one route per network
|
CONFIG_SINGLE_ROUTE There is only one route per network
|
||||||
|
|
||||||
CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field
|
CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field
|
||||||
|
|
|
@ -9,7 +9,6 @@
|
||||||
#define CONFIG_AUTO_ROUTES
|
#define CONFIG_AUTO_ROUTES
|
||||||
#define CONFIG_SELF_CONSCIOUS
|
#define CONFIG_SELF_CONSCIOUS
|
||||||
#define CONFIG_MULTIPLE_TABLES
|
#define CONFIG_MULTIPLE_TABLES
|
||||||
#define CONFIG_ALL_TABLES_AT_ONCE
|
|
||||||
#define CONFIG_IP6_SADR_KERNEL
|
#define CONFIG_IP6_SADR_KERNEL
|
||||||
|
|
||||||
#define CONFIG_MC_PROPER_SRC
|
#define CONFIG_MC_PROPER_SRC
|
||||||
|
|
|
@ -161,16 +161,13 @@ nl_open_sock(struct nl_sock *nl)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static int
|
||||||
nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
|
nl_set_strict_dump(struct nl_sock *nl UNUSED, int strict UNUSED)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* Strict checking is not necessary, it improves behavior on newer kernels.
|
|
||||||
* If it is not available (missing SOL_NETLINK compile-time, or ENOPROTOOPT
|
|
||||||
* run-time), we can just ignore it.
|
|
||||||
*/
|
|
||||||
#ifdef SOL_NETLINK
|
#ifdef SOL_NETLINK
|
||||||
setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
|
return setsockopt(nl->fd, SOL_NETLINK, NETLINK_GET_STRICT_CHK, &strict, sizeof(strict));
|
||||||
|
#else
|
||||||
|
return -1;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,10 +195,17 @@ nl_cfg_rx_buffer_size(struct config *cfg)
|
||||||
static void
|
static void
|
||||||
nl_open(void)
|
nl_open(void)
|
||||||
{
|
{
|
||||||
|
if ((nl_scan.fd >= 0) && (nl_req.fd >= 0))
|
||||||
|
return;
|
||||||
|
|
||||||
nl_open_sock(&nl_scan);
|
nl_open_sock(&nl_scan);
|
||||||
nl_open_sock(&nl_req);
|
nl_open_sock(&nl_req);
|
||||||
|
|
||||||
nl_set_strict_dump(&nl_scan, 1);
|
if (nl_set_strict_dump(&nl_scan, 1) < 0)
|
||||||
|
{
|
||||||
|
log(L_WARN "KRT: Netlink strict checking failed, will scan all tables at once");
|
||||||
|
krt_use_shared_scan();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -256,11 +260,13 @@ nl_request_dump_addr(int af)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
nl_request_dump_route(int af)
|
nl_request_dump_route(int af, int table_id)
|
||||||
{
|
{
|
||||||
struct {
|
struct {
|
||||||
struct nlmsghdr nh;
|
struct nlmsghdr nh;
|
||||||
struct rtmsg rtm;
|
struct rtmsg rtm;
|
||||||
|
struct rtattr rta;
|
||||||
|
u32 table_id;
|
||||||
} req = {
|
} req = {
|
||||||
.nh.nlmsg_type = RTM_GETROUTE,
|
.nh.nlmsg_type = RTM_GETROUTE,
|
||||||
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
|
.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)),
|
||||||
|
@ -269,7 +275,17 @@ nl_request_dump_route(int af)
|
||||||
.rtm.rtm_family = af,
|
.rtm.rtm_family = af,
|
||||||
};
|
};
|
||||||
|
|
||||||
send(nl_scan.fd, &req, sizeof(req), 0);
|
if (table_id < 256)
|
||||||
|
req.rtm.rtm_table = table_id;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
req.rta.rta_type = RTA_TABLE;
|
||||||
|
req.rta.rta_len = RTA_LENGTH(4);
|
||||||
|
req.table_id = table_id;
|
||||||
|
req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + req.rta.rta_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
send(nl_scan.fd, &req, req.nh.nlmsg_len, 0);
|
||||||
nl_scan.last_hdr = NULL;
|
nl_scan.last_hdr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1976,18 +1992,27 @@ nl_parse_route(struct nl_parse_state *s, struct nlmsghdr *h)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NULL */
|
krt_do_scan(struct krt_proto *p)
|
||||||
{
|
{
|
||||||
struct nlmsghdr *h;
|
struct nlmsghdr *h;
|
||||||
struct nl_parse_state s;
|
struct nl_parse_state s;
|
||||||
|
|
||||||
nl_parse_begin(&s, 1);
|
nl_parse_begin(&s, 1);
|
||||||
nl_request_dump_route(AF_UNSPEC);
|
|
||||||
|
/* Table-specific scan or shared scan */
|
||||||
|
if (p)
|
||||||
|
nl_request_dump_route(p->af, krt_table_id(p));
|
||||||
|
else
|
||||||
|
nl_request_dump_route(AF_UNSPEC, 0);
|
||||||
|
|
||||||
while (h = nl_get_scan())
|
while (h = nl_get_scan())
|
||||||
|
{
|
||||||
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
|
if (h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)
|
||||||
nl_parse_route(&s, h);
|
nl_parse_route(&s, h);
|
||||||
else
|
else
|
||||||
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
|
log(L_DEBUG "nl_scan_fire: Unknown packet received (type=%d)", h->nlmsg_type);
|
||||||
|
}
|
||||||
|
|
||||||
nl_parse_end(&s);
|
nl_parse_end(&s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -783,18 +783,17 @@ krt_got_route_async(struct krt_proto *p, rte *e, int new)
|
||||||
rte_free(e);
|
rte_free(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Periodic scanning
|
* Periodic scanning
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static timer *krt_scan_all_timer;
|
||||||
#ifdef CONFIG_ALL_TABLES_AT_ONCE
|
static int krt_scan_all_count;
|
||||||
|
static _Bool krt_scan_all_tables;
|
||||||
static timer *krt_scan_timer;
|
|
||||||
static int krt_scan_count;
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan(timer *t UNUSED)
|
krt_scan_all(timer *t UNUSED)
|
||||||
{
|
{
|
||||||
struct krt_proto *p;
|
struct krt_proto *p;
|
||||||
node *n;
|
node *n;
|
||||||
|
@ -815,35 +814,42 @@ krt_scan(timer *t UNUSED)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_start(struct krt_proto *p)
|
krt_scan_all_timer_start(struct krt_proto *p)
|
||||||
{
|
{
|
||||||
if (!krt_scan_count)
|
if (!krt_scan_all_count)
|
||||||
krt_scan_timer = tm_new_init(krt_pool, krt_scan, NULL, KRT_CF->scan_time, 0);
|
krt_scan_all_timer = tm_new_init(krt_pool, krt_scan_all, NULL, KRT_CF->scan_time, 0);
|
||||||
|
|
||||||
krt_scan_count++;
|
krt_scan_all_count++;
|
||||||
|
|
||||||
tm_start(krt_scan_timer, 1 S);
|
tm_start(krt_scan_all_timer, 1 S);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_stop(struct krt_proto *p UNUSED)
|
krt_scan_all_timer_stop(void)
|
||||||
{
|
{
|
||||||
krt_scan_count--;
|
ASSERT(krt_scan_all_count > 0);
|
||||||
|
|
||||||
if (!krt_scan_count)
|
krt_scan_all_count--;
|
||||||
|
|
||||||
|
if (!krt_scan_all_count)
|
||||||
{
|
{
|
||||||
rfree(krt_scan_timer);
|
rfree(krt_scan_all_timer);
|
||||||
krt_scan_timer = NULL;
|
krt_scan_all_timer = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_kick(struct krt_proto *p UNUSED)
|
krt_scan_all_timer_kick(void)
|
||||||
{
|
{
|
||||||
tm_start(krt_scan_timer, 0);
|
tm_start(krt_scan_all_timer, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
krt_use_shared_scan(void)
|
||||||
|
{
|
||||||
|
krt_scan_all_tables = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan(timer *t)
|
krt_scan(timer *t)
|
||||||
|
@ -860,27 +866,34 @@ krt_scan(timer *t)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_start(struct krt_proto *p)
|
krt_scan_timer_start(struct krt_proto *p)
|
||||||
|
{
|
||||||
|
if (krt_scan_all_tables)
|
||||||
|
krt_scan_all_timer_start(p);
|
||||||
|
else
|
||||||
{
|
{
|
||||||
p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
|
p->scan_timer = tm_new_init(p->p.pool, krt_scan, p, KRT_CF->scan_time, 0);
|
||||||
tm_start(p->scan_timer, 1 S);
|
tm_start(p->scan_timer, 1 S);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_stop(struct krt_proto *p)
|
krt_scan_timer_stop(struct krt_proto *p)
|
||||||
{
|
{
|
||||||
|
if (krt_scan_all_tables)
|
||||||
|
krt_scan_all_timer_stop();
|
||||||
|
else
|
||||||
tm_stop(p->scan_timer);
|
tm_stop(p->scan_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
krt_scan_timer_kick(struct krt_proto *p)
|
krt_scan_timer_kick(struct krt_proto *p)
|
||||||
{
|
{
|
||||||
|
if (krt_scan_all_tables)
|
||||||
|
krt_scan_all_timer_kick();
|
||||||
|
else
|
||||||
tm_start(p->scan_timer, 0);
|
tm_start(p->scan_timer, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Updates
|
* Updates
|
||||||
|
@ -1016,11 +1029,6 @@ krt_postconfig(struct proto_config *CF)
|
||||||
if (! proto_cf_main_channel(CF))
|
if (! proto_cf_main_channel(CF))
|
||||||
cf_error("Channel not specified");
|
cf_error("Channel not specified");
|
||||||
|
|
||||||
#ifdef CONFIG_ALL_TABLES_AT_ONCE
|
|
||||||
if (krt_cf->scan_time != cf->scan_time)
|
|
||||||
cf_error("All kernel syncers must use the same table scan interval");
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct channel_config *cc = proto_cf_main_channel(CF);
|
struct channel_config *cc = proto_cf_main_channel(CF);
|
||||||
struct rtable_config *tab = cc->table;
|
struct rtable_config *tab = cc->table;
|
||||||
if (tab->krt_attached)
|
if (tab->krt_attached)
|
||||||
|
|
|
@ -52,10 +52,7 @@ struct krt_proto {
|
||||||
struct rtable *krt_table; /* Internal table of inherited routes */
|
struct rtable *krt_table; /* Internal table of inherited routes */
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CONFIG_ALL_TABLES_AT_ONCE
|
|
||||||
timer *scan_timer;
|
timer *scan_timer;
|
||||||
#endif
|
|
||||||
|
|
||||||
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
|
struct bmap sync_map; /* Keeps track which exported routes were successfully written to kernel */
|
||||||
struct bmap seen_map; /* Routes seen during last periodic scan */
|
struct bmap seen_map; /* Routes seen during last periodic scan */
|
||||||
node krt_node; /* Node in krt_proto_list */
|
node krt_node; /* Node in krt_proto_list */
|
||||||
|
@ -76,6 +73,7 @@ extern pool *krt_pool;
|
||||||
|
|
||||||
struct proto_config * kif_init_config(int class);
|
struct proto_config * kif_init_config(int class);
|
||||||
void kif_request_scan(void);
|
void kif_request_scan(void);
|
||||||
|
void krt_use_shared_scan(void);
|
||||||
void krt_got_route(struct krt_proto *p, struct rte *e);
|
void krt_got_route(struct krt_proto *p, struct rte *e);
|
||||||
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new);
|
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue