Netlink: Add option to specify netlink socket receive buffer size

Add option 'netlink rx buffer' to specify netlink socket receive buffer
size. Uses SO_RCVBUFFORCE, so it can override rmem_max limit.

Thanks to Trisha Biswas and Michal for the original patches.
This commit is contained in:
Ondrej Zajicek (work) 2022-01-17 05:11:29 +01:00
parent bbc33f6ec3
commit 81ee6cda2e
5 changed files with 2243 additions and 1 deletions

View file

@ -3248,6 +3248,12 @@ channels.
allows to specify a limit on maximal number of nexthops in one route. By
default, multipath merging is disabled. If enabled, default value of the
limit is 16.
<tag><label id="krt-netlink-rx-buffer">netlink rx buffer <m/number/</tag> (Linux)
Set kernel receive buffer size (in bytes) for the netlink socket. The default
value is OS-dependent (from the <file>/proc/sys/net/core/rmem_default</file>
file), If you get some "Kernel dropped some netlink message ..." warnings,
you may increase this value.
</descrip>
<sect1>Attributes

View file

@ -69,6 +69,7 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N
struct krt_params {
u32 table_id; /* Kernel table ID we sync with */
u32 metric; /* Kernel metric used for all routes */
uint netlink_rx_buffer; /* Rx buffer size for the netlink socket */
};
struct krt_state {

View file

@ -10,7 +10,8 @@ CF_HDR
CF_DECLS
CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER,
KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
@ -24,6 +25,7 @@ kern_proto: kern_proto kern_sys_item ';' ;
kern_sys_item:
KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; }
| METRIC expr { THIS_KRT->sys.metric = $2; }
| NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; }
;
dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ;

View file

@ -174,6 +174,27 @@ nl_set_strict_dump(struct nl_sock *nl, int strict)
#endif
}
static void
nl_set_rcvbuf(int fd, uint val)
{
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
}
static uint
nl_cfg_rx_buffer_size(struct config *cfg)
{
uint bufsize = 0;
struct proto_config *pc;
WALK_LIST(pc, cfg->protos)
if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
return bufsize;
}
static void
nl_open(void)
{
@ -1976,6 +1997,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
static void
nl_async_msg(struct nlmsghdr *h)
@ -2111,6 +2134,32 @@ nl_open_async(void)
bug("Netlink: sk_open failed");
}
static void
nl_update_async_bufsize(void)
{
/* No async socket */
if (!nl_async_sk)
return;
/* Already reconfigured */
if (nl_last_config == config)
return;
/* Update netlink buffer size */
uint bufsize = nl_cfg_rx_buffer_size(config);
if (bufsize && (bufsize != nl_async_bufsize))
{
/* Log message for reconfigurations only */
if (nl_last_config)
log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
nl_set_rcvbuf(nl_async_sk->fd, bufsize);
nl_async_bufsize = bufsize;
}
nl_last_config = config;
}
/*
* Interface to the UNIX krt module
@ -2139,6 +2188,7 @@ krt_sys_start(struct krt_proto *p)
nl_open();
nl_open_async();
nl_update_async_bufsize();
return 1;
}
@ -2146,12 +2196,16 @@ krt_sys_start(struct krt_proto *p)
void
krt_sys_shutdown(struct krt_proto *p)
{
nl_update_async_bufsize();
HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
}
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
nl_update_async_bufsize();
return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
}

2179
sysdep/linux/netlink.c.orig Normal file

File diff suppressed because it is too large Load diff