Netlink: Add option to specify netlink socket receive buffer size

Add option 'netlink rx buffer' to specify netlink socket receive buffer
size. Uses SO_RCVBUFFORCE, so it can override rmem_max limit.

Thanks to Trisha Biswas and Michal for the original patches.
This commit is contained in:
Ondrej Zajicek (work) 2022-01-17 05:11:29 +01:00
parent bbc33f6ec3
commit 81ee6cda2e
5 changed files with 2243 additions and 1 deletions

View file

@ -3248,6 +3248,12 @@ channels.
allows to specify a limit on maximal number of nexthops in one route. By allows to specify a limit on maximal number of nexthops in one route. By
default, multipath merging is disabled. If enabled, default value of the default, multipath merging is disabled. If enabled, default value of the
limit is 16. limit is 16.
<tag><label id="krt-netlink-rx-buffer">netlink rx buffer <m/number/</tag> (Linux)
Set kernel receive buffer size (in bytes) for the netlink socket. The default
value is OS-dependent (from the <file>/proc/sys/net/core/rmem_default</file>
file), If you get some "Kernel dropped some netlink message ..." warnings,
you may increase this value.
</descrip> </descrip>
<sect1>Attributes <sect1>Attributes

View file

@ -69,6 +69,7 @@ static inline struct ifa * kif_get_primary_ip(struct iface *i UNUSED) { return N
struct krt_params { struct krt_params {
u32 table_id; /* Kernel table ID we sync with */ u32 table_id; /* Kernel table ID we sync with */
u32 metric; /* Kernel metric used for all routes */ u32 metric; /* Kernel metric used for all routes */
uint netlink_rx_buffer; /* Rx buffer size for the netlink socket */
}; };
struct krt_state { struct krt_state {

View file

@ -10,7 +10,8 @@ CF_HDR
CF_DECLS CF_DECLS
CF_KEYWORDS(KERNEL, TABLE, METRIC, KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW, CF_KEYWORDS(KERNEL, TABLE, METRIC, NETLINK, RX, BUFFER,
KRT_PREFSRC, KRT_REALM, KRT_SCOPE, KRT_MTU, KRT_WINDOW,
KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING, KRT_RTT, KRT_RTTVAR, KRT_SSTRESH, KRT_CWND, KRT_ADVMSS, KRT_REORDERING,
KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK, KRT_HOPLIMIT, KRT_INITCWND, KRT_RTO_MIN, KRT_INITRWND, KRT_QUICKACK,
KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR, KRT_LOCK_MTU, KRT_LOCK_WINDOW, KRT_LOCK_RTT, KRT_LOCK_RTTVAR,
@ -24,6 +25,7 @@ kern_proto: kern_proto kern_sys_item ';' ;
kern_sys_item: kern_sys_item:
KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; } KERNEL TABLE expr { THIS_KRT->sys.table_id = $3; }
| METRIC expr { THIS_KRT->sys.metric = $2; } | METRIC expr { THIS_KRT->sys.metric = $2; }
| NETLINK RX BUFFER expr { THIS_KRT->sys.netlink_rx_buffer = $4; }
; ;
dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ; dynamic_attr: KRT_PREFSRC { $$ = f_new_dynamic_attr(EAF_TYPE_IP_ADDRESS, T_IP, EA_KRT_PREFSRC); } ;

View file

@ -174,6 +174,27 @@ nl_set_strict_dump(struct nl_sock *nl, int strict)
#endif #endif
} }
static void
nl_set_rcvbuf(int fd, uint val)
{
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)) < 0)
log(L_WARN "KRT: Cannot set netlink rx buffer size to %u: %m", val);
}
static uint
nl_cfg_rx_buffer_size(struct config *cfg)
{
uint bufsize = 0;
struct proto_config *pc;
WALK_LIST(pc, cfg->protos)
if ((pc->protocol == &proto_unix_kernel) && !pc->disabled)
bufsize = MAX(bufsize, ((struct krt_config *) pc)->sys.netlink_rx_buffer);
return bufsize;
}
static void static void
nl_open(void) nl_open(void)
{ {
@ -1976,6 +1997,8 @@ krt_do_scan(struct krt_proto *p UNUSED) /* CONFIG_ALL_TABLES_AT_ONCE => p is NUL
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */ static byte *nl_async_rx_buffer; /* Receive buffer */
static uint nl_async_bufsize; /* Kernel rx buffer size for the netlink socket */
static struct config *nl_last_config; /* For tracking changes to nl_async_bufsize */
static void static void
nl_async_msg(struct nlmsghdr *h) nl_async_msg(struct nlmsghdr *h)
@ -2111,6 +2134,32 @@ nl_open_async(void)
bug("Netlink: sk_open failed"); bug("Netlink: sk_open failed");
} }
static void
nl_update_async_bufsize(void)
{
/* No async socket */
if (!nl_async_sk)
return;
/* Already reconfigured */
if (nl_last_config == config)
return;
/* Update netlink buffer size */
uint bufsize = nl_cfg_rx_buffer_size(config);
if (bufsize && (bufsize != nl_async_bufsize))
{
/* Log message for reconfigurations only */
if (nl_last_config)
log(L_INFO "KRT: Changing netlink rx buffer size to %u", bufsize);
nl_set_rcvbuf(nl_async_sk->fd, bufsize);
nl_async_bufsize = bufsize;
}
nl_last_config = config;
}
/* /*
* Interface to the UNIX krt module * Interface to the UNIX krt module
@ -2139,6 +2188,7 @@ krt_sys_start(struct krt_proto *p)
nl_open(); nl_open();
nl_open_async(); nl_open_async();
nl_update_async_bufsize();
return 1; return 1;
} }
@ -2146,12 +2196,16 @@ krt_sys_start(struct krt_proto *p)
void void
krt_sys_shutdown(struct krt_proto *p) krt_sys_shutdown(struct krt_proto *p)
{ {
nl_update_async_bufsize();
HASH_REMOVE2(nl_table_map, RTH, krt_pool, p); HASH_REMOVE2(nl_table_map, RTH, krt_pool, p);
} }
int int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o) krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{ {
nl_update_async_bufsize();
return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric); return (n->sys.table_id == o->sys.table_id) && (n->sys.metric == o->sys.metric);
} }

2179
sysdep/linux/netlink.c.orig Normal file

File diff suppressed because it is too large Load diff