Implements multiple routing table support for FreeBSD and OpenBSD.

Inspired by the patch from Alexander V. Chernikov.
This commit is contained in:
Ondrej Zajicek 2013-07-07 12:11:42 +02:00
parent c6964c305b
commit c01a94663c
6 changed files with 422 additions and 92 deletions

View file

@ -1,3 +1,4 @@
krt-sock.c krt-sock.c
krt-sock.Y
krt-sys.h krt-sys.h
sysio.h sysio.h

32
sysdep/bsd/krt-sock.Y Normal file
View file

@ -0,0 +1,32 @@
/*
* BIRD -- BSD Kernel Syncer Configuration
*
* (c) 1999--2000 Martin Mares <mj@ucw.cz>
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
CF_HDR
CF_DECLS
CF_KEYWORDS(KERNEL, TABLE)
CF_GRAMMAR
CF_ADDTO(kern_proto, kern_proto kern_sys_item ';')
kern_sys_item:
KERNEL TABLE expr {
if ($3 && (krt_max_tables == 1))
cf_error("Multiple kernel routing tables not supported");
if ($3 < 0 || $3 >= krt_max_tables)
cf_error("Kernel table id must be in range 0-%d", krt_max_tables - 1);
THIS_KRT->sys.table_id = $3;
}
;
CF_CODE
CF_END

View file

@ -1,5 +1,5 @@
/* /*
* BIRD -- Unix Routing Table Syncing * BIRD -- BSD Routing Table Syncing
* *
* (c) 2004 Ondrej Filip <feela@network.cz> * (c) 2004 Ondrej Filip <feela@network.cz>
* *
@ -7,6 +7,7 @@
*/ */
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <ctype.h> #include <ctype.h>
#include <fcntl.h> #include <fcntl.h>
#include <unistd.h> #include <unistd.h>
@ -34,18 +35,112 @@
#include "lib/socket.h" #include "lib/socket.h"
#ifndef RTAX_MAX /*
#define RTAX_MAX 8 * There are significant differences in multiple tables support between BSD variants.
*
* OpenBSD has table_id field for routes in route socket protocol, therefore all
* tables could be managed by one kernel socket. FreeBSD lacks such field,
* therefore multiple sockets (locked to specific table using SO_SETFIB socket
* option) must be used.
*
* Both FreeBSD and OpenBSD uses separate scans for each table. In OpenBSD,
* table_id is specified explicitly as sysctl scan argument, while in FreeBSD it
* is handled implicitly by changing default table using setfib() syscall.
*
* KRT_SHARED_SOCKET - use shared kernel socked instead of one for each krt_proto
* KRT_USE_SETFIB_SCAN - use setfib() for sysctl() route scan
* KRT_USE_SETFIB_SOCK - use SO_SETFIB socket option for kernel sockets
* KRT_USE_SYSCTL_7 - use 7-th arg of sysctl() as table id for route scans
* KRT_USE_SYSCTL_NET_FIBS - use net.fibs sysctl() for dynamic max number of fibs
*/
#ifdef __FreeBSD__
#define KRT_MAX_TABLES 256
#define KRT_USE_SETFIB_SCAN
#define KRT_USE_SETFIB_SOCK
#define KRT_USE_SYSCTL_NET_FIBS
#endif #endif
struct ks_msg #ifdef __OpenBSD__
#define KRT_MAX_TABLES (RT_TABLEID_MAX+1)
#define KRT_SHARED_SOCKET
#define KRT_USE_SYSCTL_7
#endif
#ifndef KRT_MAX_TABLES
#define KRT_MAX_TABLES 1
#endif
/* Dynamic max number of tables */
int krt_max_tables;
#ifdef KRT_USE_SYSCTL_NET_FIBS
static int
krt_get_max_tables(void)
{ {
struct rt_msghdr rtm; int fibs;
struct sockaddr_storage buf[RTAX_MAX]; size_t fibs_len = sizeof(fibs);
};
if (sysctlbyname("net.fibs", &fibs, &fibs_len, NULL, 0) < 0)
{
log(L_WARN "KRT: unable to get max number of fib tables: %m");
return 1;
}
return MIN(fibs, KRT_MAX_TABLES);
}
#else
static int
krt_get_max_tables(void)
{
return KRT_MAX_TABLES;
}
#endif /* KRT_USE_SYSCTL_NET_FIBS */
static int rt_sock = 0; /* setfib() syscall for FreeBSD scans */
#ifdef KRT_USE_SETFIB_SCAN
/*
static int krt_default_fib;
static int
krt_get_active_fib(void)
{
int fib;
size_t fib_len = sizeof(fib);
if (sysctlbyname("net.my_fibnum", &fib, &fib_len, NULL, 0) < 0)
{
log(L_WARN "KRT: unable to get active fib number: %m");
return 0;
}
return fib;
}
*/
extern int setfib(int fib);
#endif /* KRT_USE_SETFIB_SCAN */
/* table_id -> krt_proto map */
#ifdef KRT_SHARED_SOCKET
static struct krt_proto *krt_table_map[KRT_MAX_TABLES];
#endif
/* Route socket message processing */
int int
krt_capable(rte *e) krt_capable(rte *e)
@ -65,6 +160,16 @@ krt_capable(rte *e)
); );
} }
#ifndef RTAX_MAX
#define RTAX_MAX 8
#endif
struct ks_msg
{
struct rt_msghdr rtm;
struct sockaddr_storage buf[RTAX_MAX];
};
#define ROUNDUP(a) \ #define ROUNDUP(a) \
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long)) ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
@ -81,7 +186,7 @@ krt_capable(rte *e)
body += l;} body += l;}
static int static int
krt_sock_send(int cmd, rte *e) krt_send_route(struct krt_proto *p, int cmd, rte *e)
{ {
net *net = e->net; net *net = e->net;
rta *a = e->attrs; rta *a = e->attrs;
@ -103,13 +208,13 @@ krt_sock_send(int cmd, rte *e)
msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1; msg.rtm.rtm_flags = RTF_UP | RTF_PROTO1;
if (net->n.pxlen == MAX_PREFIX_LENGTH) if (net->n.pxlen == MAX_PREFIX_LENGTH)
{
msg.rtm.rtm_flags |= RTF_HOST; msg.rtm.rtm_flags |= RTF_HOST;
}
else else
{
msg.rtm.rtm_addrs |= RTA_NETMASK; msg.rtm.rtm_addrs |= RTA_NETMASK;
}
#ifdef KRT_SHARED_SOCKET
msg.rtm.rtm_tableid = KRT_CF->sys.table_id;
#endif
#ifdef RTF_REJECT #ifdef RTF_REJECT
if(a->dest == RTD_UNREACHABLE) if(a->dest == RTD_UNREACHABLE)
@ -192,7 +297,7 @@ krt_sock_send(int cmd, rte *e)
l = body - (char *)&msg; l = body - (char *)&msg;
msg.rtm.rtm_msglen = l; msg.rtm.rtm_msglen = l;
if ((l = write(rt_sock, (char *)&msg, l)) < 0) { if ((l = write(p->sys.sk->fd, (char *)&msg, l)) < 0) {
log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen); log(L_ERR "KRT: Error sending route %I/%d to kernel: %m", net->n.prefix, net->n.pxlen);
return -1; return -1;
} }
@ -201,16 +306,16 @@ krt_sock_send(int cmd, rte *e)
} }
void void
krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old, krt_replace_rte(struct krt_proto *p, net *n, rte *new, rte *old,
struct ea_list *eattrs UNUSED) struct ea_list *eattrs UNUSED)
{ {
int err = 0; int err = 0;
if (old) if (old)
krt_sock_send(RTM_DELETE, old); krt_send_route(p, RTM_DELETE, old);
if (new) if (new)
err = krt_sock_send(RTM_ADD, new); err = krt_send_route(p, RTM_ADD, new);
if (err < 0) if (err < 0)
n->n.flags |= KRF_SYNC_ERROR; n->n.flags |= KRF_SYNC_ERROR;
@ -221,8 +326,10 @@ krt_replace_rte(struct krt_proto *p UNUSED, net *n, rte *new, rte *old,
#define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0) #define SKIP(ARG...) do { DBG("KRT: Ignoring route - " ARG); return; } while(0)
static void static void
krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan) krt_read_route(struct ks_msg *msg, struct krt_proto *p, int scan)
{ {
/* p is NULL iff KRT_SHARED_SOCKET and !scan */
rte *e; rte *e;
net *net; net *net;
sockaddr dst, gate, mask; sockaddr dst, gate, mask;
@ -244,6 +351,17 @@ krt_read_rt(struct ks_msg *msg, struct krt_proto *p, int scan)
if (flags & RTF_LLINFO) if (flags & RTF_LLINFO)
SKIP("link-local\n"); SKIP("link-local\n");
#ifdef KRT_SHARED_SOCKET
if (!scan)
{
int table_id = msg->rtm.rtm_tableid;
p = (table_id < KRT_MAX_TABLES) ? krt_table_map[table_id] : NULL;
if (!p)
SKIP("unknown table id %d\n", table_id);
}
#endif
GETADDR(&dst, RTA_DST); GETADDR(&dst, RTA_DST);
GETADDR(&gate, RTA_GATEWAY); GETADDR(&gate, RTA_GATEWAY);
GETADDR(&mask, RTA_NETMASK); GETADDR(&mask, RTA_NETMASK);
@ -594,17 +712,18 @@ krt_read_addr(struct ks_msg *msg)
ifa_delete(&ifa); ifa_delete(&ifa);
} }
static void
void
krt_read_msg(struct proto *p, struct ks_msg *msg, int scan) krt_read_msg(struct proto *p, struct ks_msg *msg, int scan)
{ {
/* p is NULL iff KRT_SHARED_SOCKET and !scan */
switch (msg->rtm.rtm_type) switch (msg->rtm.rtm_type)
{ {
case RTM_GET: case RTM_GET:
if(!scan) return; if(!scan) return;
case RTM_ADD: case RTM_ADD:
case RTM_DELETE: case RTM_DELETE:
krt_read_rt(msg, (struct krt_proto *)p, scan); krt_read_route(msg, (struct krt_proto *)p, scan);
break; break;
case RTM_IFANNOUNCE: case RTM_IFANNOUNCE:
krt_read_ifannounce(msg); krt_read_ifannounce(msg);
@ -621,14 +740,57 @@ krt_read_msg(struct proto *p, struct ks_msg *msg, int scan)
} }
} }
static void
krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd) /* Sysctl based scans */
static byte *krt_buffer;
static size_t krt_buflen, krt_bufmin;
static struct proto *krt_buffer_owner;
static byte *
krt_buffer_update(struct proto *p, size_t *needed)
{ {
byte *next; size_t req = *needed;
int mib[6];
size_t obl, needed; if ((req > krt_buflen) ||
((p == krt_buffer_owner) && (req < krt_bufmin)))
{
/* min buflen is 32 kB, step is 8 kB, or 128 kB if > 1 MB */
size_t step = (req < 0x100000) ? 0x2000 : 0x20000;
krt_buflen = (req < 0x6000) ? 0x8000 : (req + step);
krt_bufmin = (req < 0x8000) ? 0 : (req - 2*step);
if (krt_buffer)
mb_free(krt_buffer);
krt_buffer = mb_alloc(krt_pool, krt_buflen);
krt_buffer_owner = p;
}
*needed = krt_buflen;
return krt_buffer;
}
static void
krt_buffer_release(struct proto *p)
{
if (p == krt_buffer_owner)
{
mb_free(krt_buffer);
krt_buffer = NULL;
krt_buflen = 0;
krt_buffer_owner = 0;
}
}
static void
krt_sysctl_scan(struct proto *p, int cmd, int table_id)
{
byte *buf, *next;
int mib[7], mcnt;
size_t needed;
struct ks_msg *m; struct ks_msg *m;
int retries = 3; int retries = 3;
int rv;
mib[0] = CTL_NET; mib[0] = CTL_NET;
mib[1] = PF_ROUTE; mib[1] = PF_ROUTE;
@ -636,111 +798,249 @@ krt_sysctl_scan(struct proto *p, pool *pool, byte **buf, size_t *bl, int cmd)
mib[3] = BIRD_PF; mib[3] = BIRD_PF;
mib[4] = cmd; mib[4] = cmd;
mib[5] = 0; mib[5] = 0;
mcnt = 6;
try: #ifdef KRT_USE_SYSCTL_7
if (sysctl(mib, 6 , NULL , &needed, NULL, 0) < 0) if (table_id >= 0)
die("krt_sysctl_scan 1: %m");
obl = *bl;
while (needed > *bl) *bl *= 2;
while (needed < (*bl/2)) *bl /= 2;
if ((obl!=*bl) || !*buf)
{ {
if (*buf) mb_free(*buf); mib[6] = table_id;
if ((*buf = mb_alloc(pool, *bl)) == NULL) die("RT scan buf alloc"); mcnt = 7;
} }
#endif
if (sysctl(mib, 6 , *buf, &needed, NULL, 0) < 0) #ifdef KRT_USE_SETFIB_SCAN
if (table_id > 0)
if (setfib(table_id) < 0)
{ {
if (errno == ENOMEM) log(L_ERR "KRT: setfib(%d) failed: %m", table_id);
{
/* The buffer size changed since last sysctl ('needed' is not changed) */
if (retries--)
goto try;
log(L_ERR "KRT: Route scan failed");
return; return;
} }
die("krt_sysctl_scan 2: %m"); #endif
try:
rv = sysctl(mib, mcnt, NULL, &needed, NULL, 0);
if (rv < 0)
{
/* OpenBSD returns EINVAL for not yet used tables */
if ((errno == EINVAL) && (table_id > 0))
goto exit;
log(L_ERR "KRT: Route scan estimate failed: %m");
goto exit;
} }
for (next = *buf; next < (*buf + needed); next += m->rtm.rtm_msglen) /* The table is empty */
if (needed == 0)
goto exit;
buf = krt_buffer_update(p, &needed);
rv = sysctl(mib, mcnt, buf, &needed, NULL, 0);
if (rv < 0)
{
/* The buffer size changed since last sysctl ('needed' is not changed) */
if ((errno == ENOMEM) && retries--)
goto try;
log(L_ERR "KRT: Route scan failed: %m");
goto exit;
}
#ifdef KRT_USE_SETFIB_SCAN
if (table_id > 0)
if (setfib(0) < 0)
die("KRT: setfib(%d) failed: %m", 0);
#endif
/* Process received messages */
for (next = buf; next < (buf + needed); next += m->rtm.rtm_msglen)
{ {
m = (struct ks_msg *)next; m = (struct ks_msg *)next;
krt_read_msg(p, m, 1); krt_read_msg(p, m, 1);
} }
}
static byte *krt_buffer = NULL; return;
static byte *kif_buffer = NULL;
static size_t krt_buflen = 32768; exit:
static size_t kif_buflen = 4096; krt_buffer_release(p);
#ifdef KRT_USE_SETFIB_SCAN
if (table_id > 0)
if (setfib(0) < 0)
die("KRT: setfib(%d) failed: %m", 0);
#endif
}
void void
krt_do_scan(struct krt_proto *p) krt_do_scan(struct krt_proto *p)
{ {
krt_sysctl_scan(&p->p, p->p.pool, &krt_buffer, &krt_buflen, NET_RT_DUMP); krt_sysctl_scan(&p->p, NET_RT_DUMP, KRT_CF->sys.table_id);
} }
void void
kif_do_scan(struct kif_proto *p) kif_do_scan(struct kif_proto *p)
{ {
if_start_update(); if_start_update();
krt_sysctl_scan(&p->p, p->p.pool, &kif_buffer, &kif_buflen, NET_RT_IFLIST); krt_sysctl_scan(&p->p, NET_RT_IFLIST, -1);
if_end_update(); if_end_update();
} }
/* Kernel sockets */
static int static int
krt_sock_hook(sock *sk, int size UNUSED) krt_sock_hook(sock *sk, int size UNUSED)
{ {
struct ks_msg msg; struct ks_msg msg;
int l = read(sk->fd, (char *)&msg, sizeof(msg)); int l = read(sk->fd, (char *)&msg, sizeof(msg));
if(l <= 0) if (l <= 0)
log(L_ERR "krt-sock: read failed"); log(L_ERR "krt-sock: read failed");
else else
krt_read_msg((struct proto *)sk->data, &msg, 0); krt_read_msg((struct proto *) sk->data, &msg, 0);
return 0; return 0;
} }
void static sock *
krt_sys_start(struct krt_proto *x) krt_sock_open(pool *pool, void *data, int table_id)
{ {
sock *sk_rt; sock *sk;
static int ks_open_tried = 0; int fd;
if (ks_open_tried) fd = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC);
return; if (fd < 0)
ks_open_tried = 1;
DBG("KRT: Opening kernel socket\n");
if( (rt_sock = socket(PF_ROUTE, SOCK_RAW, AF_UNSPEC)) < 0)
die("Cannot open kernel socket for routes"); die("Cannot open kernel socket for routes");
sk_rt = sk_new(krt_pool); #ifdef KRT_USE_SETFIB_SOCK
sk_rt->type = SK_MAGIC; if (table_id > 0)
sk_rt->rx_hook = krt_sock_hook; {
sk_rt->fd = rt_sock; if (setsockopt(fd, SOL_SOCKET, SO_SETFIB, &table_id, sizeof(table_id)) < 0)
sk_rt->data = x; die("Cannot set FIB %d for kernel socket: %m", table_id);
if (sk_open(sk_rt)) }
#endif
sk = sk_new(pool);
sk->type = SK_MAGIC;
sk->rx_hook = krt_sock_hook;
sk->fd = fd;
sk->data = data;
if (sk_open(sk) < 0)
bug("krt-sock: sk_open failed"); bug("krt-sock: sk_open failed");
return sk;
}
#ifdef KRT_SHARED_SOCKET
static sock *krt_sock;
static int krt_sock_count;
static void
krt_sock_open_shared(void)
{
if (!krt_sock_count)
krt_sock = krt_sock_open(krt_pool, NULL, -1);
krt_sock_count++;
}
static void
krt_sock_close_shared(void)
{
krt_sock_count--;
if (!krt_sock_count)
{
rfree(krt_sock);
krt_sock = NULL;
}
} }
void void
krt_sys_shutdown(struct krt_proto *x UNUSED) krt_sys_start(struct krt_proto *p)
{ {
if (!krt_buffer) krt_table_map[KRT_CF->sys.table_id] = p;
return;
mb_free(krt_buffer); krt_sock_open_shared();
krt_buffer = NULL; p->sys.sk = krt_sock;
} }
void
krt_sys_shutdown(struct krt_proto *p)
{
krt_sock_close_shared();
p->sys.sk = NULL;
krt_table_map[KRT_CF->sys.table_id] = NULL;
krt_buffer_release(&p->p);
}
#else
void
krt_sys_start(struct krt_proto *p)
{
p->sys.sk = krt_sock_open(p->p.pool, p, KRT_CF->sys.table_id);
}
void
krt_sys_shutdown(struct krt_proto *p)
{
rfree(p->sys.sk);
p->sys.sk = NULL;
krt_buffer_release(&p->p);
}
#endif /* KRT_SHARED_SOCKET */
/* KRT configuration callbacks */
static u32 krt_table_cf[(KRT_MAX_TABLES+31) / 32];
int
krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n, struct krt_config *o)
{
return n->sys.table_id == o->sys.table_id;
}
void
krt_sys_preconfig(struct config *c UNUSED)
{
krt_max_tables = krt_get_max_tables();
bzero(&krt_table_cf, sizeof(krt_table_cf));
}
void
krt_sys_postconfig(struct krt_config *x)
{
u32 *tbl = krt_table_cf;
int id = x->sys.table_id;
if (tbl[id/32] & (1 << (id%32)))
cf_error("Multiple kernel syncers defined for table #%d", id);
tbl[id/32] |= (1 << (id%32));
}
void krt_sys_init_config(struct krt_config *c)
{
c->sys.table_id = 0; /* Default table */
}
void krt_sys_copy_config(struct krt_config *d, struct krt_config *s)
{
d->sys.table_id = s->sys.table_id;
}
/* KIF misc code */
void void
kif_sys_start(struct kif_proto *p UNUSED) kif_sys_start(struct kif_proto *p UNUSED)
@ -748,12 +1048,8 @@ kif_sys_start(struct kif_proto *p UNUSED)
} }
void void
kif_sys_shutdown(struct kif_proto *p UNUSED) kif_sys_shutdown(struct kif_proto *p)
{ {
if (!kif_buffer) krt_buffer_release(&p->p);
return;
mb_free(kif_buffer);
kif_buffer = NULL;
} }

View file

@ -9,6 +9,7 @@
#ifndef _BIRD_KRT_SYS_H_ #ifndef _BIRD_KRT_SYS_H_
#define _BIRD_KRT_SYS_H_ #define _BIRD_KRT_SYS_H_
struct birdsock;
/* Kernel interfaces */ /* Kernel interfaces */
@ -30,20 +31,18 @@ static inline void kif_sys_copy_config(struct kif_config *d UNUSED, struct kif_c
/* Kernel routes */ /* Kernel routes */
extern int krt_max_tables;
struct krt_params { struct krt_params {
int table_id; /* Kernel table ID we sync with */
}; };
struct krt_state { struct krt_state {
struct birdsock *sk;
}; };
static inline void krt_sys_init(struct krt_proto *p UNUSED) { } static inline void krt_sys_init(struct krt_proto *p UNUSED) { }
static inline int krt_sys_reconfigure(struct krt_proto *p UNUSED, struct krt_config *n UNUSED, struct krt_config *o UNUSED) { return 1; }
static inline void krt_sys_preconfig(struct config *c UNUSED) { }
static inline void krt_sys_postconfig(struct krt_config *c UNUSED) { }
static inline void krt_sys_init_config(struct krt_config *c UNUSED) { }
static inline void krt_sys_copy_config(struct krt_config *d UNUSED, struct krt_config *s UNUSED) { }
#endif #endif

View file

@ -10,6 +10,7 @@
#define CONFIG_AUTO_ROUTES #define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS #define CONFIG_SELF_CONSCIOUS
#define CONFIG_MULTIPLE_TABLES
#define CONFIG_SKIP_MC_BIND #define CONFIG_SKIP_MC_BIND

View file

@ -8,6 +8,7 @@
#define CONFIG_AUTO_ROUTES #define CONFIG_AUTO_ROUTES
#define CONFIG_SELF_CONSCIOUS #define CONFIG_SELF_CONSCIOUS
#define CONFIG_MULTIPLE_TABLES
#define CONFIG_SKIP_MC_BIND #define CONFIG_SKIP_MC_BIND