diff --git a/NEWS b/NEWS index f7e384b4..f689d195 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,20 @@ +Version 1.6.0 (2016-04-29) + o Major RIP protocol redesign + o New Babel routing protocol + o BGP multipath support + o KRT: Add support for plenty of kernel route metrics + o KRT: Allow more than 256 routing tables + o Static: Allow to specify attributes for static routes + o Static: Support for BFD controlled static routes + o FreeBSD: Setup password for BGP MD5 authentication + o IO: Remove socket number limit + o Plenty of bug fixes + + Upgrade notes: + + For RIP, most protocol options were moved to interface blocks. + + Version 1.5.0 (2015-04-20) o Major OSPF protocol redesign. o OSPFv2 multi-instance extension (RFC 6549). diff --git a/client/birdc.c b/client/birdc.c index ccf758be..8aa01c17 100644 --- a/client/birdc.c +++ b/client/birdc.c @@ -153,7 +153,7 @@ input_init(void) // readline library does strange things when stdin is nonblocking. // if (fcntl(0, F_SETFL, O_NONBLOCK) < 0) - // die("fcntl: %m"); + // DIE("fcntl"); } static void diff --git a/client/birdcl.c b/client/birdcl.c index 2d5e1067..7b567a9f 100644 --- a/client/birdcl.c +++ b/client/birdcl.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -109,7 +110,7 @@ more_begin(void) tty.c_lflag &= (~ICANON); if (tcsetattr (0, TCSANOW, &tty) < 0) - die("tcsetattr: %m"); + DIE("tcsetattr"); more_active = 1; } @@ -120,7 +121,7 @@ more_end(void) more_active = 0; if (tcsetattr (0, TCSANOW, &stored_tty) < 0) - die("tcsetattr: %m"); + DIE("tcsetattr"); } static void @@ -137,7 +138,7 @@ input_init(void) return; if (tcgetattr(0, &stored_tty) < 0) - die("tcgetattr: %m"); + DIE("tcgetattr"); if (signal(SIGINT, sig_handler) == SIG_IGN) signal(SIGINT, SIG_IGN); diff --git a/client/client.c b/client/client.c index b938f344..0d4bdf3e 100644 --- a/client/client.c +++ b/client/client.c @@ -37,7 +37,7 @@ #define SERVER_READ_BUF_LEN 4096 -static char *opt_list = "s:vr"; +static char *opt_list = "s:vrl"; static int verbose, restricted, once; static char *init_cmd; @@ -59,13 +59,14 @@ int term_lns, term_cls; static void usage(char *name) { - fprintf(stderr, "Usage: %s [-s ] [-v] [-r]\n", name); + fprintf(stderr, "Usage: %s [-s ] [-v] [-r] [-l]\n", name); exit(1); } static void parse_args(int argc, char **argv) { + int server_changed = 0; int c; while ((c = getopt(argc, argv, opt_list)) >= 0) @@ -73,6 +74,7 @@ parse_args(int argc, char **argv) { case 's': server_path = optarg; + server_changed = 1; break; case 'v': verbose++; @@ -80,6 +82,10 @@ parse_args(int argc, char **argv) case 'r': restricted = 1; break; + case 'l': + if (!server_changed) + server_path = xbasename(server_path); + break; default: usage(argv[0]); } @@ -242,7 +248,7 @@ server_connect(void) server_fd = socket(AF_UNIX, SOCK_STREAM, 0); if (server_fd < 0) - die("Cannot create socket: %m"); + DIE("Cannot create socket"); if (strlen(server_path) >= sizeof(sa.sun_path)) die("server_connect: path too long"); @@ -251,9 +257,9 @@ server_connect(void) sa.sun_family = AF_UNIX; strcpy(sa.sun_path, server_path); if (connect(server_fd, (struct sockaddr *) &sa, SUN_LEN(&sa)) < 0) - die("Unable to connect to server control socket (%s): %m", server_path); + DIE("Unable to connect to server control socket (%s)", server_path); if (fcntl(server_fd, F_SETFL, O_NONBLOCK) < 0) - die("fcntl: %m"); + DIE("fcntl"); } @@ -303,13 +309,13 @@ server_read(void) redo: c = read(server_fd, server_read_pos, server_read_buf + sizeof(server_read_buf) - server_read_pos); if (!c) - die("Connection closed by server."); + die("Connection closed by server"); if (c < 0) { if (errno == EINTR) goto redo; else - die("Server read error: %m"); + DIE("Server read error"); } start = server_read_buf; @@ -360,7 +366,7 @@ select_loop(void) if (errno == EINTR) continue; else - die("select: %m"); + DIE("select"); } if (FD_ISSET(0, &select_fds)) @@ -393,7 +399,7 @@ wait_for_write(int fd) if (errno == EINTR) continue; else - die("select: %m"); + DIE("select"); } if (FD_ISSET(server_fd, &set)) @@ -420,7 +426,7 @@ server_send(char *cmd) else if (errno == EINTR) continue; else - die("Server write error: %m"); + DIE("Server write error"); } else { @@ -430,19 +436,6 @@ server_send(char *cmd) } } - -/* XXXX - - get_term_size(); - - if (tcgetattr(0, &tty_save) != 0) - { - perror("tcgetattr error"); - return(EXIT_FAILURE); - } - } - - */ int main(int argc, char **argv) { diff --git a/client/client.h b/client/client.h index b194a772..f9693def 100644 --- a/client/client.h +++ b/client/client.h @@ -34,3 +34,6 @@ char *cmd_expand(char *cmd); /* client.c */ void submit_command(char *cmd_raw); + +/* die() with system error messages */ +#define DIE(x, y...) die(x ": %s", ##y, strerror(errno)) diff --git a/client/commands.c b/client/commands.c index 226ae048..2dae23e1 100644 --- a/client/commands.c +++ b/client/commands.c @@ -60,7 +60,7 @@ cmd_build_tree(void) if (!new) { int size = sizeof(struct cmd_node) + c-d; - new = xmalloc(size); + new = malloc(size); bzero(new, size); *old->plastson = new; old->plastson = &new->sibling; @@ -314,7 +314,7 @@ cmd_expand(char *cmd) puts("No such command. Press `?' for help."); return NULL; } - b = xmalloc(strlen(n->cmd->command) + strlen(args) + 1); + b = malloc(strlen(n->cmd->command) + strlen(args) + 1); sprintf(b, "%s%s", n->cmd->command, args); return b; } diff --git a/client/util.c b/client/util.c index 050224b9..c35cf8f4 100644 --- a/client/util.c +++ b/client/util.c @@ -21,8 +21,11 @@ vlog(const char *msg, va_list args) { char buf[1024]; - if (bvsnprintf(buf, sizeof(buf)-1, msg, args) < 0) - bsprintf(buf + sizeof(buf) - 100, " ... "); + int n = vsnprintf(buf, sizeof(buf), msg, args); + if (n < 0) + snprintf(buf, sizeof(buf), "???"); + if (n >= sizeof(buf)) + snprintf(buf + sizeof(buf) - 100, 100, " ... "); fputs(buf, stderr); fputc('\n', stderr); } diff --git a/conf/cf-lex.l b/conf/cf-lex.l index ccf5826a..dd99b497 100644 --- a/conf/cf-lex.l +++ b/conf/cf-lex.l @@ -563,6 +563,7 @@ cf_lex_init_kh(void) /** * cf_lex_init - initialize the lexer * @is_cli: true if we're going to parse CLI command, false for configuration + * @c: configuration structure * * cf_lex_init() initializes the lexical analyzer and prepares it for * parsing of a new input. diff --git a/configure.in b/configure.in index f1697c12..a0db0fbd 100644 --- a/configure.in +++ b/configure.in @@ -167,7 +167,7 @@ fi AC_SUBST(iproutedir) -# all_protocols="$proto_bfd bgp ospf pipe radv rip static" +# all_protocols="$proto_bfd babel bgp ospf pipe radv rip static" all_protocols="$proto_bfd ospf pipe radv rip static" all_protocols=`echo $all_protocols | sed 's/ /,/g'` diff --git a/doc/bird.sgml b/doc/bird.sgml index 5e5aeee4..014225d1 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -171,6 +171,11 @@ BIRD executable by configuring out routing protocols you don't use, and -f run bird in foreground. + -l + look for a configuration file and a communication socket in the current + working directory instead of in default system locations. However, paths + specified by options -R apply graceful restart recovery after start. @@ -190,7 +195,7 @@ privileges (capabilities CAP_NET_*). Note that the control socket is created before the privileges are dropped, but the config file is read after that. The privilege restriction is not implemented in BSD port of BIRD. -

A nonprivileged user (as an argument to An unprivileged user (as an argument to show interfaces [summary] + Show the list of interfaces. For each interface, print its type, state, + MTU and addresses assigned. + show protocols [all] Show list of protocol instances along with tables they are connected to and protocol status, possibly giving verbose information, if show rip interfaces [ + Show detailed information about RIP interfaces. + + show rip neighbors [ + Show a list of RIP neighbors and associated state. + show static [ Show detailed information about static routes. show bfd sessions [ Show information about BFD sessions. - show interfaces [summary] - Show the list of interfaces. For each interface, print its type, state, - MTU and addresses assigned. - show symbols [table|filter|function|protocol|template|roa| Show the list of symbols defined in the configuration (names of protocols, routing tables etc.). @@ -824,7 +835,7 @@ This argument can be omitted if there exists only a single instance. configuration could be either confirmed using Protocols +Babel + +Introduction + +

The Babel protocol (RFC6126) is a loop-avoiding distance-vector routing +protocol that is robust and efficient both in ordinary wired networks and in +wireless mesh networks. Babel is conceptually very simple in its operation and +"just works" in its default configuration, though some configuration is possible +and in some cases desirable. + +

While the Babel protocol is dual stack (i.e., can carry both IPv4 and IPv6 +routes over the same IPv6 transport), BIRD presently implements only the IPv6 +subset of the protocol. No Babel extensions are implemented, but the BIRD +implementation can coexist with implementations using the extensions (and will +just ignore extension messages). + +

The Babel protocol implementation in BIRD is currently in alpha stage. + +Configuration + +

Babel supports no global configuration options apart from those common to all +other protocols, but supports the following per-interface configuration options: + + +protocol babel [] { + interface { + type ; + rxcost ; + hello interval ; + update interval ; + port ; + tx class|dscp ; + tx priority ; + rx buffer ; + tx length ; + check link ; + }; +} + + + + type wired|wireless + This option specifies the interface type: Wired or wireless. Wired + interfaces are considered more reliable, and so the default hello + interval is higher, and a neighbour is considered unreachable after only + a small number of "hello" packets are lost. On wireless interfaces, + hello packets are sent more often, and the ETX link quality estimation + technique is used to compute the metrics of routes discovered over this + interface. This technique will gradually degrade the metric of routes + when packets are lost rather than the more binary up/down mechanism of + wired type links. Default: rxcost + This specifies the RX cost of the interface. The route metrics will be + computed from this value with a mechanism determined by the interface + hello interval + Interval at which periodic "hello" messages are sent on this interface, + in seconds. Default: 4 seconds. + + update interval + Interval at which periodic (full) updates are sent. Default: 4 times the + hello interval. + + port + This option selects an UDP port to operate on. The default is to operate + on port 6696 as specified in the Babel RFC. + + tx class|dscp|priority + These options specify the ToS/DiffServ/Traffic class/Priority of the + outgoing Babel packets. See common + option for detailed description. + + rx buffer + This option specifies the size of buffers used for packet processing. + The buffer size should be bigger than maximal size of received packets. + The default value is the interface MTU, and the value will be clamped to a + minimum of 512 bytes + IP packet overhead. + + tx length + This option specifies the maximum length of generated Babel packets. To + avoid IP fragmentation, it should not exceed the interface MTU value. + The default value is the interface MTU value, and the value will be + clamped to a minimum of 512 bytes + IP packet overhead. + + check link + If set, the hardware link state (as reported by OS) is taken into + consideration. When the link disappears (e.g. an ethernet cable is + unplugged), neighbors are immediately considered unreachable and all + routes received from them are withdrawn. It is possible that some + hardware drivers or platforms do not implement this feature. Default: + yes. + + +

The only configurable thing about direct is what interfaces it watches: +

There are just few configuration options for the Direct protocol:

interface @@ -2161,6 +2279,12 @@ on Linux systems BIRD cannot change non-BIRD route in the kernel routing table. interfaces), just use this clause. See common option for detailed description. The Direct protocol uses extended interface clauses. + + check link + If enabled, a hardware link state (reported by OS) is taken into + consideration. Routes for directly connected networks are generated only + if link up is reported and they are withdrawn when link disappears + (e.g., an ethernet cable is unplugged). Default value is no.

Direct device routes don't contain any specific attributes. @@ -2634,7 +2758,7 @@ protocol ospf <name> { dead num When the router does not receive any messages from a neighbor in secondary switch On BSD systems, older versions of BIRD supported OSPFv2 only for the @@ -3365,6 +3489,11 @@ protocol rip [<name>] { RIP, the option is not supported for RIPng, as no further versions are defined. + version only + Regardless of RIP version configured for the interface, BIRD accepts + incoming packets of any RIP version. This option restrict accepted + packets to the configured version. Default: no. + split horizon Split horizon is a scheme for preventing routing loops. When split horizon is active, routes are not regularly propagated back to the @@ -3509,7 +3638,7 @@ default route to prevent routing loops). packets to a neighboring router, multipath routes specifying several (possibly weighted) neighboring routers, device routes specifying forwarding to hosts on a directly connected network, recursive routes computing their nexthops by doing -route table lookups for a given IP and special routes (sink, blackhole etc.) +route table lookups for a given IP, and special routes (sink, blackhole etc.) which specify a special action to be done instead of forwarding the packet.

When the particular destination is not available (the interface is down or @@ -3517,8 +3646,26 @@ the next hop of the route is not a neighbor at the moment), Static just uninstalls the route from the table it is connected to and adds it again as soon as the destination becomes adjacent again. -

The Static protocol does not have many configuration options. The definition -of the protocol contains mainly a list of static routes: +

There are three classes of definitions in Static protocol configuration -- +global options, static route definitions, and per-route options. Usually, the +definition of the protocol contains mainly a list of static routes. + +

Global options: + + + check link + If set, hardware link states of network interfaces are taken into + consideration. When link disappears (e.g. ethernet cable is unplugged), + static routes directing to that interface are removed. It is possible + that some hardware drivers or platforms do not implement this feature. + Default: off. + + igp table + Specifies a table that is used for route table lookups of recursive + routes. Default: the same table as the protocol is connected to. + + +

Route definitions (each may also contain a block of per-route options): route @@ -3526,9 +3673,9 @@ of the protocol contains mainly a list of static routes: interface can be specified as a part of the address (e.g., route + route Static multipath route. Contains several nexthops (gateways), possibly - with their weights. + with their weights. route Static device route through an interface to hosts on a directly @@ -3542,17 +3689,33 @@ of the protocol contains mainly a list of static routes: Special routes specifying to silently drop the packet, return it as unreachable or return it as administratively prohibited. First two targets are also known as - check link - If set, hardware link states of network interfaces are taken into - consideration. When link disappears (e.g. ethernet cable is unplugged), - static routes directing to that interface are removed. It is possible - that some hardware drivers or platforms do not implement this feature. - Default: off. +

Per-route options: - igp table - Specifies a table that is used for route table lookups of recursive - routes. Default: the same table as the protocol is connected to. + + bfd + The Static protocol could use BFD protocol for next hop liveness + detection. If enabled, a BFD session to the route next hop is created + and the static route is BFD-controlled -- the static route is announced + only if the next hop liveness is confirmed by BFD. If the BFD session + fails, the static route is removed. Note that this is a bit different + compared to other protocols, which may use BFD as an advisory mechanism + for fast failure detection but ignores it if a BFD session is not even + established. + + This option can be used for static routes with a direct next hop, or + also for for individual next hops in a static multipath route (see + above). Note that BFD protocol also has to be configured, see + section for details. Default value is no. + + + This is a special option that allows filter expressions to be configured + on per-route basis. Can be used multiple times. These expressions are + evaluated when the route is originated, similarly to the import filter + of the static protocol. This is especially useful for configuring route + attributes, e.g.,

Static routes have no specific attributes. @@ -3561,14 +3724,23 @@ of the protocol contains mainly a list of static routes:

protocol static { - table testable; # Connect to a non-default routing table + table testable; # Connect to a non-default routing table + check link; # Advertise routes only if link is up route 0.0.0.0/0 via 198.51.100.130; # Default route - route 10.0.0.0/8 multipath # Multipath route + route 10.0.0.0/8 multipath # Multipath route via 198.51.100.10 weight 2 - via 198.51.100.20 + via 198.51.100.20 bfd # BFD-controlled next hop via 192.0.2.1; route 203.0.113.0/24 unreachable; # Sink route - route 10.2.0.0/24 via "arc0"; # Secondary network + route 10.2.0.0/24 via "arc0"; # Secondary network + route 192.168.10.0/24 via 198.51.100.100 { + ospf_metric1 = 20; # Set extended attribute + } + route 192.168.10.0/24 via 198.51.100.100 { + ospf_metric2 = 100; # Set extended attribute + ospf_tag = 2; # Set extended attribute + bfd; # BFD-controlled route + } } diff --git a/doc/reply_codes b/doc/reply_codes index 79a7eb92..3a7f2c90 100644 --- a/doc/reply_codes +++ b/doc/reply_codes @@ -57,6 +57,9 @@ Reply codes of BIRD command-line interface 1020 Show BFD sessions 1021 Show RIP interface 1022 Show RIP neighbors +1023 Show Babel interfaces +1024 Show Babel neighbors +1025 Show Babel entries 8000 Reply too long 8001 Route not found diff --git a/lib/Makefile b/lib/Makefile index a9aae66f..8e372bd3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -1,7 +1,3 @@ -src := bitops.c checksum.c ip.c lists.c md5.c net.c patmatch.c printf.c sha1.c sha256.c sha512.c slists.c xmalloc.c -obj := $(src-o-files) -$(all-client) - src := bitops.c checksum.c event.c idm.c ip.c lists.c md5.c mempool.c net.c patmatch.c printf.c resource.c sha1.c sha256.c sha512.c slab.c slists.c tbf.c xmalloc.c obj := $(src-o-files) $(all-daemon) diff --git a/lib/birdlib.h b/lib/birdlib.h index 78df81d6..188e59b2 100644 --- a/lib/birdlib.h +++ b/lib/birdlib.h @@ -61,6 +61,7 @@ static inline int u64_cmp(u64 i1, u64 i2) #define NORET __attribute__((noreturn)) #define UNUSED __attribute__((unused)) +#define PACKED __attribute__((packed)) /* Microsecond time */ diff --git a/lib/bitops.h b/lib/bitops.h index 82bef699..9f954374 100644 --- a/lib/bitops.h +++ b/lib/bitops.h @@ -25,5 +25,6 @@ u32 u32_log2(u32 v); static inline u32 u32_hash(u32 v) { return v * 2902958171u; } -#endif +static inline u8 u32_popcount(u32 v) { return __builtin_popcount(v); } +#endif diff --git a/lib/ip.h b/lib/ip.h index ffc1b232..6541ce1e 100644 --- a/lib/ip.h +++ b/lib/ip.h @@ -26,6 +26,7 @@ #define IP6_OSPF_ALL_ROUTERS ipa_build6(0xFF020000, 0, 0, 5) #define IP6_OSPF_DES_ROUTERS ipa_build6(0xFF020000, 0, 0, 6) #define IP6_RIP_ROUTERS ipa_build6(0xFF020000, 0, 0, 9) +#define IP6_BABEL_ROUTERS ipa_build6(0xFF020000, 0, 0, 0x00010006) #define IP4_NONE _MI4(0) #define IP6_NONE _MI6(0,0,0,0) diff --git a/lib/printf.c b/lib/printf.c index 318cee2c..844f5969 100644 --- a/lib/printf.c +++ b/lib/printf.c @@ -124,9 +124,10 @@ static char * number(char * str, long num, int base, int size, int precision, * standard IP address width which depends on whether we use IPv4 or IPv6; |%I4| * or |%I6| can be used for explicit ip4_addr / ip6_addr arguments, |%N| for * generic network addresses (net_addr *), |%R| for Router / Network ID (u32 - * value printed as IPv4 address) and |%m| resp. |%M| for error messages (uses - * strerror() to translate @errno code to message text). On the other hand, it - * doesn't support floating point numbers. + * value printed as IPv4 address), |%lR| for 64bit Router / Network ID (u64 + * value printed as eight :-separated octets) and |%m| resp. |%M| for error + * messages (uses strerror() to translate @errno code to message text). On the + * other hand, it doesn't support floating point numbers. * * Result: number of characters of the output string or -1 if * the buffer space was insufficient. @@ -137,6 +138,7 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) unsigned long num; int i, base; u32 x; + u64 X; char *str, *start; const char *s; char ipbuf[NET_MAX_TEXT_LENGTH+1]; @@ -338,8 +340,23 @@ int bvsnprintf(char *buf, int size, const char *fmt, va_list args) /* Router/Network ID - essentially IPv4 address in u32 value */ case 'R': - x = va_arg(args, u32); - ip4_ntop(ip4_from_u32(x), ipbuf); + if (qualifier == 'l') { + X = va_arg(args, u64); + bsprintf(ipbuf, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", + ((X >> 56) & 0xff), + ((X >> 48) & 0xff), + ((X >> 40) & 0xff), + ((X >> 32) & 0xff), + ((X >> 24) & 0xff), + ((X >> 16) & 0xff), + ((X >> 8) & 0xff), + (X & 0xff)); + } + else + { + x = va_arg(args, u32); + ip4_ntop(ip4_from_u32(x), ipbuf); + } s = ipbuf; goto str; diff --git a/lib/socket.h b/lib/socket.h index 91ae9db3..d12ea3c5 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -79,7 +79,7 @@ int sk_leave_group(sock *s, ip_addr maddr); /* Leave multicast group on sk iface int sk_setup_broadcast(sock *s); int sk_set_ttl(sock *s, int ttl); /* Set transmit TTL for given socket */ int sk_set_min_ttl(sock *s, int ttl); /* Set minimal accepted TTL for given socket */ -int sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd); +int sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey); int sk_set_ipv6_checksum(sock *s, int offset); int sk_set_icmp6_filter(sock *s, int p1, int p2); void sk_log_error(sock *s, const char *p); diff --git a/lib/string.h b/lib/string.h index 218f7b1c..9af49b9e 100644 --- a/lib/string.h +++ b/lib/string.h @@ -24,4 +24,12 @@ void buffer_puts(buffer *buf, const char *str); int patmatch(const byte *pat, const byte *str); +static inline char *xbasename(const char *str) +{ + char *s = strrchr(str, '/'); + return s ? s+1 : (char *) str; +} + +#define ROUTER_ID_64_LENGTH 23 + #endif diff --git a/misc/bird.spec b/misc/bird.spec index e6b699a0..857f03e5 100644 --- a/misc/bird.spec +++ b/misc/bird.spec @@ -1,6 +1,6 @@ Summary: BIRD Internet Routing Daemon Name: bird -Version: 1.5.0 +Version: 1.6.0 Release: 1 Copyright: GPL Group: Networking/Daemons diff --git a/nest/config.Y b/nest/config.Y index 890a6d09..2961dafb 100644 --- a/nest/config.Y +++ b/nest/config.Y @@ -357,6 +357,7 @@ dev_proto: | dev_proto proto_item ';' | dev_proto proto_channel ';' | dev_proto dev_iface_patt ';' + | dev_proto CHECK LINK bool ';' { DIRECT_CFG->check_link = $4; } ; dev_iface_init: diff --git a/nest/neighbor.c b/nest/neighbor.c index 69f09423..2c7f9b84 100644 --- a/nest/neighbor.c +++ b/nest/neighbor.c @@ -340,7 +340,7 @@ neigh_if_link(struct iface *i) /** * neigh_ifa_update: notify neighbor cache about interface address add or remove event - * @ifa: interface address in question + * @a: interface address in question * * Tell the neighbor cache that an address was added or removed. * diff --git a/nest/proto-hooks.c b/nest/proto-hooks.c index 5923ff67..92863f8e 100644 --- a/nest/proto-hooks.c +++ b/nest/proto-hooks.c @@ -148,6 +148,7 @@ void get_route_info(rte *e, byte *buf, ea_list *attrs) * get_attr - get attribute information * @a: an extended attribute * @buf: buffer to be filled with attribute information + * @buflen: a length of the @buf parameter * * The get_attr() hook is called by the core to obtain a user friendly * representation of an extended route attribute. It can either leave diff --git a/nest/proto.c b/nest/proto.c index ce859f13..f2416748 100644 --- a/nest/proto.c +++ b/nest/proto.c @@ -1068,6 +1068,7 @@ graceful_restart_init(void) /** * graceful_restart_done - finalize graceful restart + * @t: unused * * When there are no locks on graceful restart, the functions finalizes the * graceful restart recovery. Protocols postponing route export until the end of @@ -1258,6 +1259,9 @@ protos_build(void) proto_build(&proto_bfd); bfd_init_all(); #endif +#ifdef CONFIG_BABEL + proto_build(&proto_babel); +#endif proto_pool = rp_new(&root_pool, "Protocols"); proto_shutdown_timer = tm_new(proto_pool); diff --git a/nest/protocol.h b/nest/protocol.h index 2d640504..4b7bfdf3 100644 --- a/nest/protocol.h +++ b/nest/protocol.h @@ -81,7 +81,7 @@ void protos_dump_all(void); extern struct protocol proto_device, proto_radv, proto_rip, proto_static, - proto_ospf, proto_pipe, proto_bgp, proto_bfd; + proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel; /* * Routing Protocol Instance diff --git a/nest/route.h b/nest/route.h index 865b0907..b5885ee3 100644 --- a/nest/route.h +++ b/nest/route.h @@ -228,6 +228,12 @@ typedef struct rte { struct { u8 suppressed; /* Used for deterministic MED comparison */ } bgp; +#endif +#ifdef CONFIG_BABEL + struct { + u16 metric; /* Babel metric */ + u64 router_id; /* Babel router id */ + } babel; #endif struct { /* Routes generated by krt sync (both temporary and inherited ones) */ s8 src; /* Alleged route source (see krt.h) */ @@ -377,6 +383,7 @@ typedef struct rta { #define RTS_OSPF_EXT2 10 /* OSPF external route type 2 */ #define RTS_BGP 11 /* BGP route */ #define RTS_PIPE 12 /* Inter-table wormhole */ +#define RTS_BABEL 13 /* Babel route */ #define RTC_UNICAST 0 #define RTC_BROADCAST 1 @@ -425,7 +432,8 @@ typedef struct eattr { #define EAP_RIP 2 /* RIP */ #define EAP_OSPF 3 /* OSPF */ #define EAP_KRT 4 /* Kernel route attributes */ -#define EAP_MAX 5 +#define EAP_BABEL 5 /* Babel attributes */ +#define EAP_MAX 6 #define EA_CODE(proto,id) (((proto) << 8) | (id)) #define EA_PROTO(ea) ((ea) >> 8) @@ -550,6 +558,7 @@ extern struct protocol *attr_class_to_protocol[EAP_MAX]; #define DEF_PREF_DIRECT 240 /* Directly connected */ #define DEF_PREF_STATIC 200 /* Static route */ #define DEF_PREF_OSPF 150 /* OSPF intra-area, inter-area and type 1 external routes */ +#define DEF_PREF_BABEL 130 /* Babel */ #define DEF_PREF_RIP 120 /* RIP */ #define DEF_PREF_BGP 100 /* BGP */ #define DEF_PREF_INHERITED 10 /* Routes inherited from other routing daemons */ diff --git a/nest/rt-attr.c b/nest/rt-attr.c index 6ec69a7f..167bfc44 100644 --- a/nest/rt-attr.c +++ b/nest/rt-attr.c @@ -353,7 +353,7 @@ ea_find(ea_list *e, unsigned id) * for first occurrences of attributes with ID in specified interval from @id to * (@id + @max - 1), returning pointers to found &eattr structures, storing its * walk state in @s for subsequent calls. - + * * The function ea_walk() is supposed to be called in a loop, with initially * zeroed walk state structure @s with filled the initial extended attribute * list, returning one found attribute in each call or %NULL when no other diff --git a/nest/rt-dev.c b/nest/rt-dev.c index 098885b9..d98cd79f 100644 --- a/nest/rt-dev.c +++ b/nest/rt-dev.c @@ -68,6 +68,9 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip); + if (cf->check_link && !(ad->iface->flags & IF_LINK_UP)) + return; + /* Use iface ID as local source ID */ struct rte_src *src = rt_get_source(P, ad->iface->index); @@ -87,6 +90,25 @@ dev_ifa_notify(struct proto *P, uint flags, struct ifa *ad) } } +static void +dev_if_notify(struct proto *p, uint c, struct iface *iface) +{ + struct rt_dev_config *cf = (void *) p->cf; + + if (c & (IF_CHANGE_UP | IF_CHANGE_DOWN)) + return; + + if ((c & IF_CHANGE_LINK) && cf->check_link) + { + uint ac = (iface->flags & IF_LINK_UP) ? IF_CHANGE_UP : IF_CHANGE_DOWN; + + struct ifa *a; + WALK_LIST(a, iface->addrs) + dev_ifa_notify(p, ac, a); + } +} + + static struct proto * dev_init(struct proto_config *CF) { @@ -97,6 +119,7 @@ dev_init(struct proto_config *CF) proto_configure_channel(P, &p->ip4_channel, proto_cf_find_channel(CF, NET_IP4)); proto_configure_channel(P, &p->ip6_channel, proto_cf_find_channel(CF, NET_IP6)); + P->if_notify = dev_if_notify; P->ifa_notify = dev_ifa_notify; return P; @@ -109,7 +132,8 @@ dev_reconfigure(struct proto *P, struct proto_config *CF) struct rt_dev_config *o = (void *) P->cf; struct rt_dev_config *n = (void *) CF; - if (!iface_patts_equal(&o->iface_list, &n->iface_list, NULL)) + if (!iface_patts_equal(&o->iface_list, &n->iface_list, NULL) || + (o->check_link != n->check_link)) return 0; return @@ -131,6 +155,8 @@ dev_copy_config(struct proto_config *dest, struct proto_config *src) * old nodes cannot be modified (although they contain internal lists). */ cfg_copy_list(&d->iface_list, &s->iface_list, sizeof(struct iface_patt)); + + d->check_link = s->check_link; } struct protocol proto_device = { diff --git a/nest/rt-dev.h b/nest/rt-dev.h index c9012336..20b88a64 100644 --- a/nest/rt-dev.h +++ b/nest/rt-dev.h @@ -12,6 +12,7 @@ struct rt_dev_config { struct proto_config c; list iface_list; /* list of struct iface_patt */ + int check_link; }; struct rt_dev_proto { diff --git a/nest/rt-table.c b/nest/rt-table.c index 2329bb53..9e9d4c7a 100644 --- a/nest/rt-table.c +++ b/nest/rt-table.c @@ -834,16 +834,20 @@ rt_notify_merged(struct channel *c, net *net, rte *new_changed, rte *old_changed * @net: network in question * @new: the new route to be announced * @old: the previous route for the same network + * @new_best: the new best route for the same network + * @old_best: the previous best route for the same network + * @before_old: The previous route before @old for the same network. + * If @before_old is NULL @old was the first. * * This function gets a routing table update and announces it * to all protocols that acccepts given type of route announcement * and are connected to the same table by their announcement hooks. * - * Route announcement of type RA_OPTIMAL si generated when optimal + * Route announcement of type %RA_OPTIMAL si generated when optimal * route (in routing table @tab) changes. In that case @old stores the * old optimal route. * - * Route announcement of type RA_ANY si generated when any route (in + * Route announcement of type %RA_ANY si generated when any route (in * routing table @tab) changes In that case @old stores the old route * from the same protocol. * diff --git a/proto/Doc b/proto/Doc index 7863472f..04c25bc0 100644 --- a/proto/Doc +++ b/proto/Doc @@ -1,4 +1,5 @@ H Protocols +C babel C bfd C bgp C ospf diff --git a/proto/babel/Doc b/proto/babel/Doc new file mode 100644 index 00000000..80026f91 --- /dev/null +++ b/proto/babel/Doc @@ -0,0 +1,2 @@ +S babel.c +S packets.c diff --git a/proto/babel/Makefile b/proto/babel/Makefile new file mode 100644 index 00000000..400ffbac --- /dev/null +++ b/proto/babel/Makefile @@ -0,0 +1,5 @@ +source=babel.c packets.c +root-rel=../../ +dir-name=proto/babel + +include ../../Rules diff --git a/proto/babel/babel.c b/proto/babel/babel.c new file mode 100644 index 00000000..8e104d60 --- /dev/null +++ b/proto/babel/babel.c @@ -0,0 +1,2055 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the main routines for handling and sending TLVs, as + * well as timers and interaction with the nest. + */ + +/** + * DOC: The Babel protocol + * + * Babel (RFC6126) is a loop-avoiding distance-vector routing protocol that is + * robust and efficient both in ordinary wired networks and in wireless mesh + * networks. + * + * The Babel protocol keeps state for each neighbour in a &babel_neighbor + * struct, tracking received Hello and I Heard You (IHU) messages. A + * &babel_interface struct keeps hello and update times for each interface, and + * a separate hello seqno is maintained for each interface. + * + * For each prefix, Babel keeps track of both the possible routes (with next hop + * and router IDs), as well as the feasibility distance for each prefix and + * router id. The prefix itself is tracked in a &babel_entry struct, while the + * possible routes for the prefix are tracked as &babel_route entries and the + * feasibility distance is maintained through &babel_source structures. + * + * The main route selection is done in babel_select_route(). This is called when + * an entry is updated by receiving updates from the network or when modified by + * internal timers. It performs feasibility checks on the available routes for + * the prefix and selects the one with the lowest metric to be announced to the + * core. + */ + +#include +#include "babel.h" + + +#define OUR_ROUTE(r) (r->neigh == NULL) + +/* + * Is one number greater or equal than another mod 2^16? This is based on the + * definition of serial number space in RFC 1982. Note that arguments are of + * uint type to avoid integer promotion to signed integer. + */ +static inline int ge_mod64k(uint a, uint b) +{ return (u16)(a - b) < 0x8000; } + +static void babel_dump_entry(struct babel_entry *e); +static void babel_dump_route(struct babel_route *r); +static void babel_select_route(struct babel_entry *e); +static void babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n); +static void babel_send_wildcard_request(struct babel_iface *ifa); +static int babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno); +static void babel_trigger_iface_update(struct babel_iface *ifa); +static void babel_trigger_update(struct babel_proto *p); +static void babel_send_seqno_request(struct babel_entry *e); +static inline void babel_kick_timer(struct babel_proto *p); +static inline void babel_iface_kick_timer(struct babel_iface *ifa); + + +/* + * Functions to maintain data structures + */ + +static void +babel_init_entry(struct fib_node *n) +{ + struct babel_entry *e = (void *) n; + e->proto = NULL; + e->selected_in = NULL; + e->selected_out = NULL; + e->updated = now; + init_list(&e->sources); + init_list(&e->routes); +} + +static inline struct babel_entry * +babel_find_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + return fib_find(&p->rtable, &prefix, plen); +} + +static struct babel_entry * +babel_get_entry(struct babel_proto *p, ip_addr prefix, u8 plen) +{ + struct babel_entry *e = fib_get(&p->rtable, &prefix, plen); + e->proto = p; + return e; +} + +static struct babel_source * +babel_find_source(struct babel_entry *e, u64 router_id) +{ + struct babel_source *s; + + WALK_LIST(s, e->sources) + if (s->router_id == router_id) + return s; + + return NULL; +} + +static struct babel_source * +babel_get_source(struct babel_entry *e, u64 router_id) +{ + struct babel_proto *p = e->proto; + struct babel_source *s = babel_find_source(e, router_id); + + if (s) + return s; + + s = sl_alloc(p->source_slab); + s->router_id = router_id; + s->expires = now + BABEL_GARBAGE_INTERVAL; + s->seqno = 0; + s->metric = BABEL_INFINITY; + add_tail(&e->sources, NODE s); + + return s; +} + +static void +babel_expire_sources(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_source *n, *nx; + + WALK_LIST_DELSAFE(n, nx, e->sources) + { + if (n->expires && n->expires <= now) + { + rem_node(NODE n); + sl_free(p->source_slab, n); + } + } +} + +static struct babel_route * +babel_find_route(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_route *r; + + WALK_LIST(r, e->routes) + if (r->neigh == n) + return r; + + return NULL; +} + +static struct babel_route * +babel_get_route(struct babel_entry *e, struct babel_neighbor *nbr) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = babel_find_route(e, nbr); + + if (r) + return r; + + r = sl_alloc(p->route_slab); + memset(r, 0, sizeof(*r)); + r->e = e; + add_tail(&e->routes, NODE r); + + if (nbr) + { + r->neigh = nbr; + r->expires = now + BABEL_GARBAGE_INTERVAL; + add_tail(&nbr->routes, NODE &r->neigh_route); + } + + return r; +} + +static void +babel_flush_route(struct babel_route *r) +{ + struct babel_proto *p = r->e->proto; + + DBG("Babel: Flush route %I/%d router_id %lR neigh %I\n", + r->e->n.prefix, r->e->n.pxlen, r->router_id, r->neigh ? r->neigh->addr : IPA_NONE); + + rem_node(NODE r); + + if (r->neigh) + rem_node(&r->neigh_route); + + if (r->e->selected_in == r) + r->e->selected_in = NULL; + + if (r->e->selected_out == r) + r->e->selected_out = NULL; + + sl_free(p->route_slab, r); +} + +static void +babel_expire_route(struct babel_route *r) +{ + struct babel_proto *p = r->e->proto; + struct babel_entry *e = r->e; + + TRACE(D_EVENTS, "Route expiry timer for %I/%d router-id %lR fired", + e->n.prefix, e->n.pxlen, r->router_id); + + if (r->metric < BABEL_INFINITY) + { + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + } + else + { + babel_flush_route(r); + } +} + +static void +babel_refresh_route(struct babel_route *r) +{ + if (!OUR_ROUTE(r) && (r == r->e->selected_in)) + babel_send_route_request(r->e, r->neigh); + + r->refresh_time = 0; +} + +static void +babel_expire_routes(struct babel_proto *p) +{ + struct babel_entry *e; + struct babel_route *r, *rx; + struct fib_iterator fit; + + FIB_ITERATE_INIT(&fit, &p->rtable); + +loop: + FIB_ITERATE_START(&p->rtable, &fit, n) + { + e = (struct babel_entry *) n; + int changed = 0; + + WALK_LIST_DELSAFE(r, rx, e->routes) + { + if (r->refresh_time && r->refresh_time <= now) + babel_refresh_route(r); + + if (r->expires && r->expires <= now) + { + babel_expire_route(r); + changed = 1; + } + } + + if (changed) + { + /* + * We have to restart the iteration because there may be a cascade of + * synchronous events babel_select_route() -> nest table change -> + * babel_rt_notify() -> p->rtable change, invalidating hidden variables. + */ + + FIB_ITERATE_PUT(&fit, n); + babel_select_route(e); + goto loop; + } + + babel_expire_sources(e); + + /* Remove empty entries */ + if (EMPTY_LIST(e->sources) && EMPTY_LIST(e->routes)) + { + FIB_ITERATE_PUT(&fit, n); + fib_delete(&p->rtable, e); + goto loop; + } + } + FIB_ITERATE_END(n); +} + +static struct babel_neighbor * +babel_find_neighbor(struct babel_iface *ifa, ip_addr addr) +{ + struct babel_neighbor *nbr; + + WALK_LIST(nbr, ifa->neigh_list) + if (ipa_equal(nbr->addr, addr)) + return nbr; + + return NULL; +} + +static struct babel_neighbor * +babel_get_neighbor(struct babel_iface *ifa, ip_addr addr) +{ + struct babel_neighbor *nbr = babel_find_neighbor(ifa, addr); + + if (nbr) + return nbr; + + nbr = mb_allocz(ifa->pool, sizeof(struct babel_neighbor)); + nbr->ifa = ifa; + nbr->addr = addr; + nbr->txcost = BABEL_INFINITY; + init_list(&nbr->routes); + add_tail(&ifa->neigh_list, NODE nbr); + + return nbr; +} + +static void +babel_flush_neighbor(struct babel_neighbor *nbr) +{ + struct babel_proto *p = nbr->ifa->proto; + node *n; + + TRACE(D_EVENTS, "Flushing neighbor %I", nbr->addr); + + WALK_LIST_FIRST(n, nbr->routes) + { + struct babel_route *r = SKIP_BACK(struct babel_route, neigh_route, n); + struct babel_entry *e = r->e; + int selected = (r == e->selected_in); + + babel_flush_route(r); + + if (selected) + babel_select_route(e); + } + + rem_node(NODE nbr); + mb_free(nbr); +} + +static void +babel_expire_ihu(struct babel_neighbor *nbr) +{ + nbr->txcost = BABEL_INFINITY; +} + +static void +babel_expire_hello(struct babel_neighbor *nbr) +{ + nbr->hello_map <<= 1; + + if (nbr->hello_cnt < 16) + nbr->hello_cnt++; + + if (!nbr->hello_map) + babel_flush_neighbor(nbr); +} + +static void +babel_expire_neighbors(struct babel_proto *p) +{ + struct babel_iface *ifa; + struct babel_neighbor *nbr, *nbx; + + WALK_LIST(ifa, p->interfaces) + { + WALK_LIST_DELSAFE(nbr, nbx, ifa->neigh_list) + { + if (nbr->ihu_expiry && nbr->ihu_expiry <= now) + babel_expire_ihu(nbr); + + if (nbr->hello_expiry && nbr->hello_expiry <= now) + babel_expire_hello(nbr); + } + } +} + + +/* + * Best route selection + */ + +/* + * From the RFC (section 3.5.1): + * + * a route advertisement carrying the quintuple (prefix, plen, router-id, seqno, + * metric) is feasible if one of the following conditions holds: + * + * - metric is infinite; or + * + * - no entry exists in the source table indexed by (id, prefix, plen); or + * + * - an entry (prefix, plen, router-id, seqno', metric') exists in the source + * table, and either + * - seqno' < seqno or + * - seqno = seqno' and metric < metric'. + */ +static inline int +babel_is_feasible(struct babel_source *s, u16 seqno, u16 metric) +{ + return !s || + (metric == BABEL_INFINITY) || + (seqno > s->seqno) || + ((seqno == s->seqno) && (metric < s->metric)); +} + +static u16 +babel_compute_rxcost(struct babel_neighbor *n) +{ + struct babel_iface *ifa = n->ifa; + u8 cnt, missed; + u16 map=n->hello_map; + + if (!map) return BABEL_INFINITY; + cnt = u32_popcount(map); // number of bits set + missed = n->hello_cnt-cnt; + + if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + { + /* ETX - Appendix 2.2 in the RFC. + + beta = prob. of successful transmission. + rxcost = BABEL_RXCOST_WIRELESS/beta + + Since: beta = 1-missed/n->hello_cnt = cnt/n->hello_cnt + Then: rxcost = BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt + */ + if (!cnt) return BABEL_INFINITY; + return BABEL_RXCOST_WIRELESS * n->hello_cnt / cnt; + } + else + { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + DBG("Babel: Missed %d hellos from %I\n", missed, n->addr); + /* Link is bad if more than half the expected hellos were lost */ + return (missed > n->hello_cnt/2) ? BABEL_INFINITY : ifa->cf->rxcost; + } +} + + +static u16 +babel_compute_cost(struct babel_neighbor *n) +{ + struct babel_iface *ifa = n->ifa; + u16 rxcost = babel_compute_rxcost(n); + if (rxcost == BABEL_INFINITY) return rxcost; + else if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + { + /* ETX - Appendix 2.2 in the RFC */ + return (MAX(n->txcost, BABEL_RXCOST_WIRELESS) * rxcost)/BABEL_RXCOST_WIRELESS; + } + else + { + /* k-out-of-j selection - Appendix 2.1 in the RFC. */ + return n->txcost; + } +} + +/* Simple additive metric - Appendix 3.1 in the RFC */ +static u16 +babel_compute_metric(struct babel_neighbor *n, uint metric) +{ + metric += babel_compute_cost(n); + return MIN(metric, BABEL_INFINITY); +} + + +/** + * babel_announce_rte - announce selected route to the core + * @p: Babel protocol instance + * @e: Babel route entry to announce + * + * This function announces a Babel entry to the core if it has a selected + * incoming path, and retracts it otherwise. If the selected entry has infinite + * metric, the route is announced as unreachable. + */ +static void +babel_announce_rte(struct babel_proto *p, struct babel_entry *e) +{ + struct babel_route *r = e->selected_in; + + if (r) + { + net *n = net_get(p->p.table, e->n.prefix, e->n.pxlen); + rta A = { + .src = p->p.main_source, + .source = RTS_BABEL, + .scope = SCOPE_UNIVERSE, + .cast = RTC_UNICAST, + .dest = r->metric == BABEL_INFINITY ? RTD_UNREACHABLE : RTD_ROUTER, + .flags = 0, + .from = r->neigh->addr, + .iface = r->neigh->ifa->iface, + }; + + if (r->metric < BABEL_INFINITY) + A.gw = r->next_hop; + + rta *a = rta_lookup(&A); + rte *rte = rte_get_temp(a); + rte->u.babel.metric = r->metric; + rte->u.babel.router_id = r->router_id; + rte->net = n; + rte->pflags = 0; + + rte_update(&p->p, n, rte); + } + else + { + /* Retraction */ + net *n = net_find(p->p.table, e->n.prefix, e->n.pxlen); + rte_update(&p->p, n, NULL); + } +} + +/** + * babel_select_route - select best route for given route entry + * @e: Babel entry to select the best route for + * + * Select the best feasible route for a given prefix among the routes received + * from peers, and propagate it to the nest. This just selects the feasible + * route with the lowest metric. + * + * If no feasible route is available for a prefix that previously had a route + * selected, a seqno request is sent to try to get a valid route. In the + * meantime, the route is marked as infeasible in the nest (to blackhole packets + * going to it, as per the RFC). + * + * If no feasible route is available, and no previous route is selected, the + * route is removed from the nest entirely. + */ +static void +babel_select_route(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r, *cur = e->selected_in; + + /* try to find the best feasible route */ + WALK_LIST(r, e->routes) + if (!OUR_ROUTE(r) && /* prevent propagating our own routes back to core */ + (!cur || r->metric < cur->metric) && + babel_is_feasible(babel_find_source(e, r->router_id), r->seqno, r->advert_metric)) + cur = r; + + if (cur && !OUR_ROUTE(cur) && + ((!e->selected_in && cur->metric < BABEL_INFINITY) || + (e->selected_in && cur->metric < e->selected_in->metric))) + { + TRACE(D_EVENTS, "Picked new route for prefix %I/%d: router id %lR metric %d", + e->n.prefix, e->n.pxlen, cur->router_id, cur->metric); + + e->selected_in = cur; + e->updated = now; + babel_announce_rte(p, e); + } + else if (!cur || cur->metric == BABEL_INFINITY) + { + /* Couldn't find a feasible route. If we have a selected route, that means + it just became infeasible; so set it's metric to infinite and install it + (as unreachable), then send a seqno request. + + babel_build_rte() will set the unreachable flag if the metric is BABEL_INFINITY.*/ + if (e->selected_in) + { + TRACE(D_EVENTS, "Lost feasible route for prefix %I/%d", + e->n.prefix, e->n.pxlen); + + e->selected_in->metric = BABEL_INFINITY; + e->updated = now; + + babel_send_seqno_request(e); + babel_announce_rte(p, e); + } + else + { + /* No route currently selected, and no new one selected; this means we + don't have a route to this destination anymore (and were probably + called from an expiry timer). Remove the route from the nest. */ + TRACE(D_EVENTS, "Flushing route for prefix %I/%d", e->n.prefix, e->n.pxlen); + + e->selected_in = NULL; + e->updated = now; + babel_announce_rte(p, e); + } + } +} + +/* + * Functions to send replies + */ + +static void +babel_send_ack(struct babel_iface *ifa, ip_addr dest, u16 nonce) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending ACK to %I with nonce %d", dest, nonce); + + msg.type = BABEL_TLV_ACK; + msg.ack.nonce = nonce; + + babel_send_unicast(&msg, ifa, dest); +} + +static void +babel_build_ihu(union babel_msg *msg, struct babel_iface *ifa, struct babel_neighbor *n) +{ + struct babel_proto *p = ifa->proto; + + msg->type = BABEL_TLV_IHU; + msg->ihu.addr = n->addr; + msg->ihu.rxcost = babel_compute_rxcost(n); + msg->ihu.interval = ifa->cf->ihu_interval; + + TRACE(D_PACKETS, "Sending IHU for %I with rxcost %d interval %d", + msg->ihu.addr, msg->ihu.rxcost, msg->ihu.interval); +} + +static void +babel_send_ihu(struct babel_iface *ifa, struct babel_neighbor *n) +{ + union babel_msg msg = {}; + babel_build_ihu(&msg, ifa, n); + babel_send_unicast(&msg, ifa, n->addr); +} + +static void +babel_send_ihus(struct babel_iface *ifa) +{ + struct babel_neighbor *n; + WALK_LIST(n, ifa->neigh_list) + { + union babel_msg msg = {}; + babel_build_ihu(&msg, ifa, n); + babel_enqueue(&msg, ifa); + } +} + +static void +babel_send_hello(struct babel_iface *ifa, u8 send_ihu) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + msg.type = BABEL_TLV_HELLO; + msg.hello.seqno = ifa->hello_seqno++; + msg.hello.interval = ifa->cf->hello_interval; + + TRACE(D_PACKETS, "Sending hello on %s with seqno %d interval %d", + ifa->ifname, msg.hello.seqno, msg.hello.interval); + + babel_enqueue(&msg, ifa); + + if (send_ihu) + babel_send_ihus(ifa); +} + +static void +babel_send_route_request(struct babel_entry *e, struct babel_neighbor *n) +{ + struct babel_proto *p = e->proto; + struct babel_iface *ifa = n->ifa; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending route request for %I/%d to %I", + e->n.prefix, e->n.pxlen, n->addr); + + msg.type = BABEL_TLV_ROUTE_REQUEST; + msg.route_request.prefix = e->n.prefix; + msg.route_request.plen = e->n.pxlen; + + babel_send_unicast(&msg, ifa, n->addr); +} + +static void +babel_send_wildcard_request(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending wildcard route request on %s", + ifa->ifname); + + msg.type = BABEL_TLV_ROUTE_REQUEST; + msg.route_request.full = 1; + + babel_enqueue(&msg, ifa); +} + +static void +babel_send_seqno_request(struct babel_entry *e) +{ + struct babel_proto *p = e->proto; + struct babel_route *r = e->selected_in; + struct babel_iface *ifa = NULL; + struct babel_source *s = NULL; + union babel_msg msg = {}; + + s = babel_find_source(e, r->router_id); + if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) + return; + + TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); + + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = e->n.pxlen; + msg.seqno_request.seqno = s->seqno + 1; + msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.router_id = r->router_id; + msg.seqno_request.prefix = e->n.prefix; + + WALK_LIST(ifa, p->interfaces) + babel_enqueue(&msg, ifa); +} + +static void +babel_unicast_seqno_request(struct babel_route *r) +{ + struct babel_entry *e = r->e; + struct babel_proto *p = e->proto; + struct babel_iface *ifa = r->neigh->ifa; + struct babel_source *s = NULL; + union babel_msg msg = {}; + + s = babel_find_source(e, r->router_id); + if (!s || !babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1)) + return; + + TRACE(D_PACKETS, "Sending seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, r->router_id, s->seqno + 1); + + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = e->n.pxlen; + msg.seqno_request.seqno = s->seqno + 1; + msg.seqno_request.hop_count = BABEL_INITIAL_HOP_COUNT; + msg.seqno_request.router_id = r->router_id; + msg.seqno_request.prefix = e->n.prefix; + + babel_send_unicast(&msg, ifa, r->neigh->addr); +} + +/** + * babel_send_update - send route table updates + * @ifa: Interface to transmit on + * @changed: Only send entries changed since this time + * + * This function produces update TLVs for all entries changed since the time + * indicated by the &changed parameter and queues them for transmission on the + * selected interface. During the process, the feasibility distance for each + * transmitted entry is updated. + */ +static void +babel_send_update(struct babel_iface *ifa, bird_clock_t changed) +{ + struct babel_proto *p = ifa->proto; + + FIB_WALK(&p->rtable, n) + { + struct babel_entry *e = (void *) n; + struct babel_route *r = e->selected_out; + + if (!r) + continue; + + /* Our own seqno might have changed, in which case we update the routes we + originate. */ + if ((r->router_id == p->router_id) && (r->seqno < p->update_seqno)) + { + r->seqno = p->update_seqno; + e->updated = now; + } + + /* Skip routes that weren't updated since 'changed' time */ + if (e->updated < changed) + continue; + + TRACE(D_PACKETS, "Sending update for %I/%d router-id %lR seqno %d metric %d", + e->n.prefix, e->n.pxlen, r->router_id, r->seqno, r->metric); + + union babel_msg msg = {}; + msg.type = BABEL_TLV_UPDATE; + msg.update.plen = e->n.pxlen; + msg.update.interval = ifa->cf->update_interval; + msg.update.seqno = r->seqno; + msg.update.metric = r->metric; + msg.update.prefix = e->n.prefix; + msg.update.router_id = r->router_id; + + /* Update feasibility distance */ + struct babel_source *s = babel_get_source(e, r->router_id); + s->expires = now + BABEL_GARBAGE_INTERVAL; + if ((msg.update.seqno > s->seqno) || + ((msg.update.seqno == s->seqno) && (msg.update.metric < s->metric))) + { + s->seqno = msg.update.seqno; + s->metric = msg.update.metric; + } + babel_enqueue(&msg, ifa); + } + FIB_WALK_END; +} + +static void +babel_trigger_iface_update(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + /* Interface not active or already scheduled */ + if (!ifa->up || ifa->want_triggered) + return; + + TRACE(D_EVENTS, "Scheduling triggered updates for %s seqno %d", + ifa->iface->name, p->update_seqno); + + ifa->want_triggered = now; + babel_iface_kick_timer(ifa); +} + +/* Sends and update on all interfaces. */ +static void +babel_trigger_update(struct babel_proto *p) +{ + if (p->triggered) + return; + + struct babel_iface *ifa; + WALK_LIST(ifa, p->interfaces) + babel_trigger_iface_update(ifa); + + p->triggered = 1; +} + +/* A retraction is an update with an infinite metric */ +static void +babel_send_retraction(struct babel_iface *ifa, ip_addr prefix, int plen) +{ + struct babel_proto *p = ifa->proto; + union babel_msg msg = {}; + + TRACE(D_PACKETS, "Sending retraction for %I/%d router-id %lR seqno %d", + prefix, plen, p->router_id, p->update_seqno); + + msg.type = BABEL_TLV_UPDATE; + msg.update.plen = plen; + msg.update.interval = ifa->cf->update_interval; + msg.update.seqno = p->update_seqno; + msg.update.metric = BABEL_INFINITY; + msg.update.prefix = prefix; + msg.update.router_id = p->router_id; + + babel_enqueue(&msg, ifa); +} + + +/* + * TLV handler helpers + */ + +/* Update hello history according to Appendix A1 of the RFC */ +static void +babel_update_hello_history(struct babel_neighbor *n, u16 seqno, u16 interval) +{ + /* + * Compute the difference between expected and received seqno (modulo 2^16). + * If the expected and received seqnos are within 16 of each other, the modular + * difference is going to be less than 16 for one of the directions. Otherwise, + * the values differ too much, so just reset the state. + */ + + u16 delta = ((uint) seqno - (uint) n->next_hello_seqno); + + if (delta == 0) + { + /* Do nothing */ + } + else if (delta <= 16) + { + /* Sending node decreased interval; fast-forward */ + n->hello_map <<= delta; + n->hello_cnt = MIN(n->hello_cnt + delta, 16); + } + else if (delta >= 0xfff0) + { + u8 diff = (0xffff - delta); + /* Sending node increased interval; undo history */ + n->hello_map >>= diff; + n->hello_cnt = (diff < n->hello_cnt) ? n->hello_cnt - diff : 0; + } + else + { + /* Note state reset - flush entries */ + n->hello_map = n->hello_cnt = 0; + } + + /* Current entry */ + n->hello_map = (n->hello_map << 1) | 1; + n->next_hello_seqno = seqno+1; + if (n->hello_cnt < 16) n->hello_cnt++; + n->hello_expiry = now + BABEL_HELLO_EXPIRY_FACTOR(interval); +} + +static void +babel_expire_seqno_requests(struct babel_proto *p) +{ + struct babel_seqno_request *n, *nx; + WALK_LIST_DELSAFE(n, nx, p->seqno_cache) + { + if ((n->updated + BABEL_SEQNO_REQUEST_EXPIRY) <= now) + { + rem_node(NODE n); + sl_free(p->seqno_slab, n); + } + } +} + +/* + * Checks the seqno request cache for a matching request and returns failure if + * found. Otherwise, a new entry is stored in the cache. + */ +static int +babel_cache_seqno_request(struct babel_proto *p, ip_addr prefix, u8 plen, + u64 router_id, u16 seqno) +{ + struct babel_seqno_request *r; + + WALK_LIST(r, p->seqno_cache) + { + if (ipa_equal(r->prefix, prefix) && (r->plen == plen) && + (r->router_id == router_id) && (r->seqno == seqno)) + return 0; + } + + /* no entries found */ + r = sl_alloc(p->seqno_slab); + r->prefix = prefix; + r->plen = plen; + r->router_id = router_id; + r->seqno = seqno; + r->updated = now; + add_tail(&p->seqno_cache, NODE r); + + return 1; +} + +static void +babel_forward_seqno_request(struct babel_entry *e, + struct babel_msg_seqno_request *in, + ip_addr sender) +{ + struct babel_proto *p = e->proto; + struct babel_route *r; + + TRACE(D_PACKETS, "Forwarding seqno request for %I/%d router-id %lR seqno %d", + e->n.prefix, e->n.pxlen, in->router_id, in->seqno); + + WALK_LIST(r, e->routes) + { + if ((r->router_id == in->router_id) && + !OUR_ROUTE(r) && + !ipa_equal(r->neigh->addr, sender)) + { + if (!babel_cache_seqno_request(p, e->n.prefix, e->n.pxlen, in->router_id, in->seqno)) + return; + + union babel_msg msg = {}; + msg.type = BABEL_TLV_SEQNO_REQUEST; + msg.seqno_request.plen = in->plen; + msg.seqno_request.seqno = in->seqno; + msg.seqno_request.hop_count = in->hop_count-1; + msg.seqno_request.router_id = in->router_id; + msg.seqno_request.prefix = e->n.prefix; + + babel_send_unicast(&msg, r->neigh->ifa, r->neigh->addr); + return; + } + } +} + + +/* + * TLV handlers + */ + +void +babel_handle_ack_req(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_ack_req *msg = &m->ack_req; + + TRACE(D_PACKETS, "Handling ACK request nonce %d interval %d", + msg->nonce, msg->interval); + + babel_send_ack(ifa, msg->sender, msg->nonce); +} + +void +babel_handle_hello(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_hello *msg = &m->hello; + + TRACE(D_PACKETS, "Handling hello seqno %d interval %d", + msg->seqno, msg->interval); + + struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + babel_update_hello_history(n, msg->seqno, msg->interval); + if (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS) + babel_send_ihu(ifa, n); +} + +void +babel_handle_ihu(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_ihu *msg = &m->ihu; + + /* Ignore IHUs that are not about us */ + if ((msg->ae != BABEL_AE_WILDCARD) && !ipa_equal(msg->addr, ifa->addr)) + return; + + TRACE(D_PACKETS, "Handling IHU rxcost %d interval %d", + msg->rxcost, msg->interval); + + struct babel_neighbor *n = babel_get_neighbor(ifa, msg->sender); + n->txcost = msg->rxcost; + n->ihu_expiry = now + BABEL_IHU_EXPIRY_FACTOR(msg->interval); +} + +/** + * babel_handle_update - handle incoming route updates + * @m: Incoming update TLV + * @ifa: Interface the update was received on + * + * This function is called as a handler for update TLVs and handles the updating + * and maintenance of route entries in Babel's internal routing cache. The + * handling follows the actions described in the Babel RFC, and at the end of + * each update handling, babel_select_route() is called on the affected entry to + * optionally update the selected routes and propagate them to the core. + */ +void +babel_handle_update(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_update *msg = &m->update; + + struct babel_neighbor *n; + struct babel_entry *e; + struct babel_source *s; + struct babel_route *r; + int feasible; + + TRACE(D_PACKETS, "Handling update for %I/%d with seqno %d metric %d", + msg->prefix, msg->plen, msg->seqno, msg->metric); + + n = babel_find_neighbor(ifa, msg->sender); + if (!n) + { + DBG("Babel: Haven't heard from neighbor %I; ignoring update.\n", msg->sender); + return; + } + + if (msg->router_id == p->router_id) + { + DBG("Babel: Ignoring update for our own router ID.\n"); + return; + } + + /* + * RFC section 3.5.4: + * + * When a Babel node receives an update (id, prefix, seqno, metric) from a + * neighbour neigh with a link cost value equal to cost, it checks whether it + * already has a routing table entry indexed by (neigh, id, prefix). + * + * If no such entry exists: + * + * o if the update is unfeasible, it is ignored; + * + * o if the metric is infinite (the update is a retraction), the update is + * ignored; + * + * o otherwise, a new route table entry is created, indexed by (neigh, id, + * prefix), with seqno equal to seqno and an advertised metric equal to the + * metric carried by the update. + * + * If such an entry exists: + * + * o if the entry is currently installed and the update is unfeasible, then + * the behaviour depends on whether the router-ids of the two entries match. + * If the router-ids are different, the update is treated as though it were + * a retraction (i.e., as though the metric were FFFF hexadecimal). If the + * router-ids are equal, the update is ignored; + * + * o otherwise (i.e., if either the update is feasible or the entry is not + * currently installed), then the entry's sequence number, advertised + * metric, metric, and router-id are updated and, unless the advertised + * metric is infinite, the route's expiry timer is reset to a small multiple + * of the Interval value included in the update. + */ + + if (msg->metric == BABEL_INFINITY) + e = babel_find_entry(p, msg->prefix, msg->plen); + else + e = babel_get_entry(p, msg->prefix, msg->plen); + + if (!e) + return; + + s = babel_find_source(e, msg->router_id); /* for feasibility */ + r = babel_find_route(e, n); /* the route entry indexed by neighbour */ + feasible = babel_is_feasible(s, msg->seqno, msg->metric); + + if (!r) + { + if (!feasible || (msg->metric == BABEL_INFINITY)) + return; + + r = babel_get_route(e, n); + r->advert_metric = msg->metric; + r->router_id = msg->router_id; + r->metric = babel_compute_metric(n, msg->metric); + r->next_hop = msg->next_hop; + r->seqno = msg->seqno; + } + else if (r == r->e->selected_in && !feasible) + { + /* Route is installed and update is infeasible - we may lose the route, so + send a unicast seqno request (section 3.8.2.2 second paragraph). */ + babel_unicast_seqno_request(r); + + if (msg->router_id == r->router_id) return; + r->metric = BABEL_INFINITY; /* retraction */ + } + else + { + /* Last paragraph above - update the entry */ + r->advert_metric = msg->metric; + r->metric = babel_compute_metric(n, msg->metric); + r->router_id = msg->router_id; + r->next_hop = msg->next_hop; + r->seqno = msg->seqno; + + if (msg->metric != BABEL_INFINITY) + { + r->expiry_interval = BABEL_ROUTE_EXPIRY_FACTOR(msg->interval); + r->expires = now + r->expiry_interval; + if (r->expiry_interval > BABEL_ROUTE_REFRESH_INTERVAL) + r->refresh_time = now + r->expiry_interval - BABEL_ROUTE_REFRESH_INTERVAL; + } + + /* If the route is not feasible at this point, it means it is from another + neighbour than the one currently selected; so send a unicast seqno + request to try to get a better route (section 3.8.2.2 last paragraph). */ + if (!feasible) + babel_unicast_seqno_request(r); + } + + babel_select_route(e); +} + +void +babel_handle_route_request(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_route_request *msg = &m->route_request; + + /* RFC 6126 3.8.1.1 */ + + /* Wildcard request - full update on the interface */ + if (msg->full) + { + TRACE(D_PACKETS, "Handling wildcard route request"); + ifa->want_triggered = 1; + return; + } + + TRACE(D_PACKETS, "Handling route request for %I/%d", msg->prefix, msg->plen); + + /* Non-wildcard request - see if we have an entry for the route. + If not, send a retraction, otherwise send an update. */ + struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); + if (!e) + { + babel_send_retraction(ifa, msg->prefix, msg->plen); + } + else + { + babel_trigger_iface_update(ifa); + e->updated = now; + } +} + + +void +babel_handle_seqno_request(union babel_msg *m, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + /* RFC 6126 3.8.1.2 */ + + TRACE(D_PACKETS, "Handling seqno request for %I/%d router-id %lR seqno %d hop count %d", + msg->prefix, msg->plen, msg->router_id, msg->seqno, msg->hop_count); + + /* Ignore if we have no such entry or entry has infinite metric */ + struct babel_entry *e = babel_find_entry(p, msg->prefix, msg->plen); + if (!e || !e->selected_out || (e->selected_out->metric == BABEL_INFINITY)) + return; + + /* Trigger update on incoming interface if we have a selected route with + different router id or seqno no smaller than requested */ + struct babel_route *r = e->selected_out; + if ((r->router_id != msg->router_id) || ge_mod64k(r->seqno, msg->seqno)) + { + babel_trigger_iface_update(ifa); + e->updated = now; + return; + } + + /* Seqno is larger; check if we own the router id */ + if (msg->router_id == p->router_id) + { + /* Ours; update seqno and trigger global update */ + p->update_seqno++; + babel_trigger_update(p); + } + else + { + /* Not ours; forward if TTL allows it */ + if (msg->hop_count > 1) + babel_forward_seqno_request(e, msg, msg->sender); + } +} + + +/* + * Babel interfaces + */ + +/** + * babel_iface_timer - Babel interface timer handler + * @t: Timer + * + * This function is called by the per-interface timer and triggers sending of + * periodic Hello's and both triggered and periodic updates. Periodic Hello's + * and updates are simply handled by setting the next_{hello,regular} variables + * on the interface, and triggering an update (and resetting the variable) + * whenever 'now' exceeds that value. + * + * For triggered updates, babel_trigger_iface_update() will set the + * want_triggered field on the interface to a timestamp value. If this is set + * (and the next_triggered time has passed; this is a rate limiting mechanism), + * babel_send_update() will be called with this timestamp as the second + * parameter. This causes updates to be send consisting of only the routes that + * have changed since the time saved in want_triggered. + * + * Mostly when an update is triggered, the route being modified will be set to + * the value of 'now' at the time of the trigger; the >= comparison for + * selecting which routes to send in the update will make sure this is included. + */ +static void +babel_iface_timer(timer *t) +{ + struct babel_iface *ifa = t->data; + struct babel_proto *p = ifa->proto; + bird_clock_t hello_period = ifa->cf->hello_interval; + bird_clock_t update_period = ifa->cf->update_interval; + + if (now >= ifa->next_hello) + { + babel_send_hello(ifa, (ifa->cf->type == BABEL_IFACE_TYPE_WIRELESS || + ifa->hello_seqno % BABEL_IHU_INTERVAL_FACTOR == 0)); + ifa->next_hello += hello_period * (1 + (now - ifa->next_hello) / hello_period); + } + + if (now >= ifa->next_regular) + { + TRACE(D_EVENTS, "Sending regular updates on %s", ifa->ifname); + babel_send_update(ifa, 0); + ifa->next_regular += update_period * (1 + (now - ifa->next_regular) / update_period); + ifa->want_triggered = 0; + p->triggered = 0; + } + else if (ifa->want_triggered && (now >= ifa->next_triggered)) + { + TRACE(D_EVENTS, "Sending triggered updates on %s", ifa->ifname); + babel_send_update(ifa, ifa->want_triggered); + ifa->next_triggered = now + MIN(5, update_period / 2 + 1); + ifa->want_triggered = 0; + p->triggered = 0; + } + + bird_clock_t next_event = MIN(ifa->next_hello, ifa->next_regular); + tm_start(ifa->timer, ifa->want_triggered ? 1 : (next_event - now)); +} + +static inline void +babel_iface_kick_timer(struct babel_iface *ifa) +{ + if (ifa->timer->expires > (now + 1)) + tm_start(ifa->timer, 1); +} + +static void +babel_iface_start(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + TRACE(D_EVENTS, "Starting interface %s", ifa->ifname); + + ifa->next_hello = now + (random() % ifa->cf->hello_interval) + 1; + ifa->next_regular = now + (random() % ifa->cf->update_interval) + 1; + ifa->next_triggered = now + MIN(5, ifa->cf->update_interval / 2 + 1); + ifa->want_triggered = 0; /* We send an immediate update (below) */ + tm_start(ifa->timer, 1); + ifa->up = 1; + + babel_send_hello(ifa, 0); + babel_send_wildcard_request(ifa); + babel_send_update(ifa, 0); /* Full update */ +} + +static void +babel_iface_stop(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_neighbor *nbr; + struct babel_route *r; + node *n; + + TRACE(D_EVENTS, "Stopping interface %s", ifa->ifname); + + /* + * Rather than just flushing the neighbours, we set the metric of their routes + * to infinity. This allows us to keep the neighbour hello state for when the + * interface comes back up. The routes will also be kept until they expire. + */ + WALK_LIST(nbr, ifa->neigh_list) + { + WALK_LIST(n, nbr->routes) + { + r = SKIP_BACK(struct babel_route, neigh_route, n); + r->metric = BABEL_INFINITY; + r->expires = now + r->expiry_interval; + babel_select_route(r->e); + } + } + + tm_stop(ifa->timer); + ifa->up = 0; +} + +static inline int +babel_iface_link_up(struct babel_iface *ifa) +{ + return !ifa->cf->check_link || (ifa->iface->flags & IF_LINK_UP); +} + +static void +babel_iface_update_state(struct babel_iface *ifa) +{ + int up = ifa->sk && babel_iface_link_up(ifa); + + if (up == ifa->up) + return; + + if (up) + babel_iface_start(ifa); + else + babel_iface_stop(ifa); +} + +static void +babel_iface_update_buffers(struct babel_iface *ifa) +{ + if (!ifa->sk) + return; + + uint mtu = MAX(BABEL_MIN_MTU, ifa->iface->mtu); + uint rbsize = ifa->cf->rx_buffer ?: mtu; + uint tbsize = ifa->cf->tx_length ?: mtu; + rbsize = MAX(rbsize, tbsize); + + sk_set_rbsize(ifa->sk, rbsize); + sk_set_tbsize(ifa->sk, tbsize); + + ifa->tx_length = tbsize - BABEL_OVERHEAD; +} + +static struct babel_iface* +babel_find_iface(struct babel_proto *p, struct iface *what) +{ + struct babel_iface *ifa; + + WALK_LIST (ifa, p->interfaces) + if (ifa->iface == what) + return ifa; + + return NULL; +} + +static void +babel_iface_locked(struct object_lock *lock) +{ + struct babel_iface *ifa = lock->data; + struct babel_proto *p = ifa->proto; + + if (!babel_open_socket(ifa)) + { + log(L_ERR "%s: Cannot open socket for %s", p->p.name, ifa->iface->name); + return; + } + + babel_iface_update_buffers(ifa); + babel_iface_update_state(ifa); +} + +static void +babel_add_iface(struct babel_proto *p, struct iface *new, struct babel_iface_config *ic) +{ + struct babel_iface *ifa; + + TRACE(D_EVENTS, "Adding interface %s", new->name); + + pool *pool = rp_new(p->p.pool, new->name); + + ifa = mb_allocz(pool, sizeof(struct babel_iface)); + ifa->proto = p; + ifa->iface = new; + ifa->cf = ic; + ifa->pool = pool; + ifa->ifname = new->name; + + add_tail(&p->interfaces, NODE ifa); + + struct ifa *addr; + WALK_LIST(addr, new->addrs) + if (ipa_is_link_local(addr->ip)) + ifa->addr = addr->ip; + + if (ipa_zero(ifa->addr)) + log(L_WARN "%s: Cannot find link-local addr on %s", p->p.name, new->name); + + init_list(&ifa->neigh_list); + ifa->hello_seqno = 1; + + ifa->timer = tm_new_set(ifa->pool, babel_iface_timer, ifa, 0, 0); + + init_list(&ifa->msg_queue); + ifa->send_event = ev_new(ifa->pool); + ifa->send_event->hook = babel_send_queue; + ifa->send_event->data = ifa; + + struct object_lock *lock = olock_new(ifa->pool); + lock->type = OBJLOCK_UDP; + lock->addr = IP6_BABEL_ROUTERS; + lock->port = ifa->cf->port; + lock->iface = ifa->iface; + lock->hook = babel_iface_locked; + lock->data = ifa; + + olock_acquire(lock); +} + +static void +babel_remove_iface(struct babel_proto *p, struct babel_iface *ifa) +{ + TRACE(D_EVENTS, "Removing interface %s", ifa->iface->name); + + struct babel_neighbor *n; + WALK_LIST_FIRST(n, ifa->neigh_list) + babel_flush_neighbor(n); + + rem_node(NODE ifa); + + rfree(ifa->pool); /* contains ifa itself, locks, socket, etc */ +} + +static void +babel_if_notify(struct proto *P, unsigned flags, struct iface *iface) +{ + struct babel_proto *p = (void *) P; + struct babel_config *cf = (void *) P->cf; + + if (iface->flags & IF_IGNORE) + return; + + if (flags & IF_CHANGE_UP) + { + struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, iface->addr); + + /* we only speak multicast */ + if (!(iface->flags & IF_MULTICAST)) + return; + + if (ic) + babel_add_iface(p, iface, ic); + + return; + } + + struct babel_iface *ifa = babel_find_iface(p, iface); + + if (!ifa) + return; + + if (flags & IF_CHANGE_DOWN) + { + babel_remove_iface(p, ifa); + return; + } + + if (flags & IF_CHANGE_MTU) + babel_iface_update_buffers(ifa); + + if (flags & IF_CHANGE_LINK) + babel_iface_update_state(ifa); +} + +static int +babel_reconfigure_iface(struct babel_proto *p, struct babel_iface *ifa, struct babel_iface_config *new) +{ + struct babel_iface_config *old = ifa->cf; + + /* Change of these options would require to reset the iface socket */ + if ((new->port != old->port) || + (new->tx_tos != old->tx_tos) || + (new->tx_priority != old->tx_priority)) + return 0; + + TRACE(D_EVENTS, "Reconfiguring interface %s", ifa->iface->name); + + ifa->cf = new; + + if (ifa->next_regular > (now + new->update_interval)) + ifa->next_regular = now + (random() % new->update_interval) + 1; + + if ((new->tx_length != old->tx_length) || (new->rx_buffer != old->rx_buffer)) + babel_iface_update_buffers(ifa); + + if (new->check_link != old->check_link) + babel_iface_update_state(ifa); + + if (ifa->up) + babel_iface_kick_timer(ifa); + + return 1; +} + +static void +babel_reconfigure_ifaces(struct babel_proto *p, struct babel_config *cf) +{ + struct iface *iface; + + WALK_LIST(iface, iface_list) + { + if (! (iface->flags & IF_UP)) + continue; + + struct babel_iface *ifa = babel_find_iface(p, iface); + struct babel_iface_config *ic = (void *) iface_patt_find(&cf->iface_list, iface, NULL); + + if (ifa && ic) + { + if (babel_reconfigure_iface(p, ifa, ic)) + continue; + + /* Hard restart */ + log(L_INFO "%s: Restarting interface %s", p->p.name, ifa->iface->name); + babel_remove_iface(p, ifa); + babel_add_iface(p, iface, ic); + } + + if (ifa && !ic) + babel_remove_iface(p, ifa); + + if (!ifa && ic) + babel_add_iface(p, iface, ic); + } +} + + +/* + * Debugging and info output functions + */ + +static void +babel_dump_source(struct babel_source *s) +{ + debug("Source router_id %lR seqno %d metric %d expires %d\n", + s->router_id, s->seqno, s->metric, s->expires ? s->expires-now : 0); +} + +static void +babel_dump_route(struct babel_route *r) +{ + debug("Route neigh %I if %s seqno %d metric %d/%d router_id %lR expires %d\n", + r->neigh ? r->neigh->addr : IPA_NONE, + r->neigh ? r->neigh->ifa->ifname : "(none)", + r->seqno, r->advert_metric, r->metric, + r->router_id, r->expires ? r->expires-now : 0); +} + +static void +babel_dump_entry(struct babel_entry *e) +{ + struct babel_source *s; + struct babel_route *r; + + debug("Babel: Entry %I/%d:\n", e->n.prefix, e->n.pxlen); + + WALK_LIST(s,e->sources) + { debug(" "); babel_dump_source(s); } + + WALK_LIST(r,e->routes) + { + debug(" "); + if (r == e->selected_out) debug("*"); + if (r == e->selected_in) debug("+"); + babel_dump_route(r); + } +} + +static void +babel_dump_neighbor(struct babel_neighbor *n) +{ + debug("Neighbor %I txcost %d hello_map %x next seqno %d expires %d/%d\n", + n->addr, n->txcost, n->hello_map, n->next_hello_seqno, + n->hello_expiry ? n->hello_expiry - now : 0, + n->ihu_expiry ? n->ihu_expiry - now : 0); +} + +static void +babel_dump_iface(struct babel_iface *ifa) +{ + struct babel_neighbor *n; + + debug("Babel: Interface %s addr %I rxcost %d type %d hello seqno %d intervals %d %d\n", + ifa->ifname, ifa->addr, ifa->cf->rxcost, ifa->cf->type, ifa->hello_seqno, + ifa->cf->hello_interval, ifa->cf->update_interval); + + WALK_LIST(n, ifa->neigh_list) + { debug(" "); babel_dump_neighbor(n); } +} + +static void +babel_dump(struct proto *P) +{ + struct babel_proto *p = (struct babel_proto *) P; + struct babel_iface *ifa; + + debug("Babel: router id %lR update seqno %d\n", p->router_id, p->update_seqno); + + WALK_LIST(ifa, p->interfaces) + babel_dump_iface(ifa); + + FIB_WALK(&p->rtable, n) + { + babel_dump_entry((struct babel_entry *) n); + } + FIB_WALK_END; +} + +static void +babel_get_route_info(rte *rte, byte *buf, ea_list *attrs) +{ + buf += bsprintf(buf, " (%d/%d) [%lR]", rte->pref, rte->u.babel.metric, rte->u.babel.router_id); +} + +static int +babel_get_attr(eattr *a, byte *buf, int buflen UNUSED) +{ + switch (a->id) + { + case EA_BABEL_METRIC: + bsprintf(buf, "metric: %d", a->u.data); + return GA_FULL; + + case EA_BABEL_ROUTER_ID: + { + u64 rid = 0; + memcpy(&rid, a->u.ptr->data, sizeof(u64)); + bsprintf(buf, "router_id: %lR", rid); + return GA_FULL; + } + + default: + return GA_UNKNOWN; + } +} + +void +babel_show_interfaces(struct proto *P, char *iff) +{ + struct babel_proto *p = (void *) P; + struct babel_iface *ifa = NULL; + struct babel_neighbor *nbr = NULL; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1023, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1023, "%s:", p->p.name); + cli_msg(-1023, "%-10s %-6s %7s %6s %6s", + "Interface", "State", "RX cost", "Nbrs", "Timer"); + + WALK_LIST(ifa, p->interfaces) + { + if (iff && !patmatch(iff, ifa->iface->name)) + continue; + + int nbrs = 0; + WALK_LIST(nbr, ifa->neigh_list) + nbrs++; + + int timer = MIN(ifa->next_regular, ifa->next_hello) - now; + cli_msg(-1023, "%-10s %-6s %7u %6u %6u", + ifa->iface->name, (ifa->up ? "Up" : "Down"), ifa->cf->rxcost, nbrs, MAX(timer, 0)); + } + + cli_msg(0, ""); +} + +void +babel_show_neighbors(struct proto *P, char *iff) +{ + struct babel_proto *p = (void *) P; + struct babel_iface *ifa = NULL; + struct babel_neighbor *n = NULL; + struct babel_route *r = NULL; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1024, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1024, "%s:", p->p.name); + cli_msg(-1024, "%-25s %-10s %6s %6s %10s", + "IP address", "Interface", "Metric", "Routes", "Next hello"); + + WALK_LIST(ifa, p->interfaces) + { + if (iff && !patmatch(iff, ifa->iface->name)) + continue; + + WALK_LIST(n, ifa->neigh_list) + { + int rts = 0; + WALK_LIST(r, n->routes) + rts++; + + int timer = n->hello_expiry - now; + cli_msg(-1024, "%-25I %-10s %6u %6u %10u", + n->addr, ifa->iface->name, n->txcost, rts, MAX(timer, 0)); + } + } + + cli_msg(0, ""); +} + +void +babel_show_entries(struct proto *P) +{ + struct babel_proto *p = (void *) P; + struct babel_entry *e = NULL; + struct babel_source *s = NULL; + struct babel_route *r = NULL; + + char ipbuf[STD_ADDRESS_P_LENGTH+5]; + char ridbuf[ROUTER_ID_64_LENGTH+1]; + + if (p->p.proto_state != PS_UP) + { + cli_msg(-1025, "%s: is not up", p->p.name); + cli_msg(0, ""); + return; + } + + cli_msg(-1025, "%s:", p->p.name); + cli_msg(-1025, "%-29s %-23s %6s %5s %7s %7s", + "Prefix", "Router ID", "Metric", "Seqno", "Expires", "Sources"); + + FIB_WALK(&p->rtable, n) + { + e = (struct babel_entry *) n; + r = e->selected_in ? e->selected_in : e->selected_out; + + int srcs = 0; + WALK_LIST(s, e->sources) + srcs++; + + bsprintf(ipbuf, "%I/%u", e->n.prefix, e->n.pxlen); + + if (r) + { + if (r->router_id == p->router_id) + bsprintf(ridbuf, "%s", ""); + else + bsprintf(ridbuf, "%lR", r->router_id); + + int time = r->expires ? r->expires - now : 0; + cli_msg(-1025, "%-29s %-23s %6u %5u %7u %7u", + ipbuf, ridbuf, r->metric, r->seqno, MAX(time, 0), srcs); + } + else + { + cli_msg(-1025, "%-29s %-44s %7u", ipbuf, "", srcs); + } + } + FIB_WALK_END; + + cli_msg(0, ""); +} + + +/* + * Babel protocol glue + */ + +/** + * babel_timer - global timer hook + * @t: Timer + * + * This function is called by the global protocol instance timer and handles + * expiration of routes and neighbours as well as pruning of the seqno request + * cache. + */ +static void +babel_timer(timer *t) +{ + struct babel_proto *p = t->data; + + babel_expire_routes(p); + babel_expire_seqno_requests(p); + babel_expire_neighbors(p); +} + +static inline void +babel_kick_timer(struct babel_proto *p) +{ + if (p->timer->expires > (now + 1)) + tm_start(p->timer, 1); +} + + +static struct ea_list * +babel_prepare_attrs(struct linpool *pool, ea_list *next, uint metric, u64 router_id) +{ + struct ea_list *l = lp_alloc(pool, sizeof(struct ea_list) + 2*sizeof(eattr)); + struct adata *rid = lp_alloc(pool, sizeof(struct adata) + sizeof(u64)); + rid->length = sizeof(u64); + memcpy(&rid->data, &router_id, sizeof(u64)); + + l->next = next; + l->flags = EALF_SORTED; + l->count = 2; + + l->attrs[0].id = EA_BABEL_METRIC; + l->attrs[0].flags = 0; + l->attrs[0].type = EAF_TYPE_INT | EAF_TEMP; + l->attrs[0].u.data = metric; + + l->attrs[1].id = EA_BABEL_ROUTER_ID; + l->attrs[1].flags = 0; + l->attrs[1].type = EAF_TYPE_OPAQUE | EAF_TEMP; + l->attrs[1].u.ptr = rid; + + return l; +} + + +static int +babel_import_control(struct proto *P, struct rte **rt, struct ea_list **attrs, struct linpool *pool) +{ + struct babel_proto *p = (void *) P; + + /* Prepare attributes with initial values */ + if ((*rt)->attrs->source != RTS_BABEL) + *attrs = babel_prepare_attrs(pool, NULL, 0, p->router_id); + + return 0; +} + +static struct ea_list * +babel_make_tmp_attrs(struct rte *rt, struct linpool *pool) +{ + return babel_prepare_attrs(pool, NULL, rt->u.babel.metric, rt->u.babel.router_id); +} + +static void +babel_store_tmp_attrs(struct rte *rt, struct ea_list *attrs) +{ + rt->u.babel.metric = ea_get_int(attrs, EA_BABEL_METRIC, 0); +} + +/* + * babel_rt_notify - core tells us about new route (possibly our own), + * so store it into our data structures. + */ +static void +babel_rt_notify(struct proto *P, struct rtable *table UNUSED, struct network *net, + struct rte *new, struct rte *old, struct ea_list *attrs) +{ + struct babel_proto *p = (void *) P; + struct babel_entry *e; + struct babel_route *r; + + if (new) + { + /* Update */ + e = babel_get_entry(p, net->n.prefix, net->n.pxlen); + + if (new->attrs->src->proto != P) + { + r = babel_get_route(e, NULL); + r->seqno = p->update_seqno; + r->router_id = p->router_id; + r->metric = 0; /* FIXME: should be selectable */ + } + else + r = e->selected_in; + + if (r != e->selected_out) + { + e->selected_out = r; + e->updated = now; + babel_trigger_update(p); + } + } + else + { + /* Withdraw */ + e = babel_find_entry(p, net->n.prefix, net->n.pxlen); + if (!e || !e->selected_out) + return; + + if (OUR_ROUTE(e->selected_out)) + { + /* + * We originate this route, so set its metric to infinity and set an + * expiry time. This causes a retraction to be sent, and later the route + * to be flushed once the hold time has passed. + */ + e->selected_out->metric = BABEL_INFINITY; + e->selected_out->expires = now + BABEL_HOLD_TIME; + e->updated = now; + babel_trigger_update(p); + } + else + { + /* + * This is a route originating from someone else that was lost; presumably + * because an export filter was updated to filter it. This means we can't + * set the metric to infinity (it would be overridden on subsequent + * updates from the peer originating the route), so just clear the + * exported route. + * + * This causes peers to expire the route after a while (like if we just + * shut down), but it's the best we can do in these circumstances; and + * since export filters presumably aren't updated that often this is + * acceptable. + */ + e->selected_out = NULL; + } + } +} + +static int +babel_rte_better(struct rte *new, struct rte *old) +{ + return new->u.babel.metric < old->u.babel.metric; +} + +static int +babel_rte_same(struct rte *new, struct rte *old) +{ + return ((new->u.babel.router_id == old->u.babel.router_id) && + (new->u.babel.metric == old->u.babel.metric)); +} + + +static struct proto * +babel_init(struct proto_config *cfg) +{ + struct proto *P = proto_new(cfg, sizeof(struct babel_proto)); + + P->accept_ra_types = RA_OPTIMAL; + P->if_notify = babel_if_notify; + P->rt_notify = babel_rt_notify; + P->import_control = babel_import_control; + P->make_tmp_attrs = babel_make_tmp_attrs; + P->store_tmp_attrs = babel_store_tmp_attrs; + P->rte_better = babel_rte_better; + P->rte_same = babel_rte_same; + + return P; +} + +static int +babel_start(struct proto *P) +{ + struct babel_proto *p = (void *) P; + struct babel_config *cf = (void *) P->cf; + + fib_init(&p->rtable, P->pool, sizeof(struct babel_entry), 0, babel_init_entry); + init_list(&p->interfaces); + p->timer = tm_new_set(P->pool, babel_timer, p, 0, 1); + tm_start(p->timer, 2); + p->update_seqno = 1; + p->router_id = proto_get_router_id(&cf->c); + + p->route_slab = sl_new(P->pool, sizeof(struct babel_route)); + p->source_slab = sl_new(P->pool, sizeof(struct babel_source)); + p->msg_slab = sl_new(P->pool, sizeof(struct babel_msg_node)); + p->seqno_slab = sl_new(P->pool, sizeof(struct babel_seqno_request)); + init_list(&p->seqno_cache); + + p->log_pkt_tbf = (struct tbf){ .rate = 1, .burst = 5 }; + + return PS_UP; +} + +static int +babel_reconfigure(struct proto *P, struct proto_config *c) +{ + struct babel_proto *p = (void *) P; + struct babel_config *new = (void *) c; + + TRACE(D_EVENTS, "Reconfiguring"); + + p->p.cf = c; + babel_reconfigure_ifaces(p, new); + + babel_trigger_update(p); + babel_kick_timer(p); + + return 1; +} + + +struct protocol proto_babel = { + .name = "Babel", + .template = "babel%d", + .attr_class = EAP_BABEL, + .preference = DEF_PREF_BABEL, + .config_size = sizeof(struct babel_config), + .init = babel_init, + .dump = babel_dump, + .start = babel_start, + .reconfigure = babel_reconfigure, + .get_route_info = babel_get_route_info, + .get_attr = babel_get_attr +}; diff --git a/proto/babel/babel.h b/proto/babel/babel.h new file mode 100644 index 00000000..aea0dd88 --- /dev/null +++ b/proto/babel/babel.h @@ -0,0 +1,335 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the data structures used by Babel. + */ + +#ifndef _BIRD_BABEL_H_ +#define _BIRD_BABEL_H_ + +#include "nest/bird.h" +#include "nest/cli.h" +#include "nest/iface.h" +#include "nest/route.h" +#include "nest/protocol.h" +#include "nest/locks.h" +#include "lib/resource.h" +#include "lib/lists.h" +#include "lib/socket.h" +#include "lib/string.h" +#include "lib/timer.h" + +#ifndef IPV6 +#error "The Babel protocol only speaks IPv6" +#endif + +#define EA_BABEL_METRIC EA_CODE(EAP_BABEL, 0) +#define EA_BABEL_ROUTER_ID EA_CODE(EAP_BABEL, 1) + +#define BABEL_MAGIC 42 +#define BABEL_VERSION 2 +#define BABEL_PORT 6696 +#define BABEL_INFINITY 0xFFFF + + +#define BABEL_HELLO_INTERVAL_WIRED 4 /* Default hello intervals in seconds */ +#define BABEL_HELLO_INTERVAL_WIRELESS 4 +#define BABEL_UPDATE_INTERVAL_FACTOR 4 +#define BABEL_IHU_INTERVAL_FACTOR 3 +#define BABEL_IHU_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ +#define BABEL_HELLO_EXPIRY_FACTOR(X) ((X)*3/2) /* 1.5 */ +#define BABEL_ROUTE_EXPIRY_FACTOR(X) ((X)*7/2) /* 3.5 */ +#define BABEL_ROUTE_REFRESH_INTERVAL 2 /* Seconds before route expiry to send route request */ +#define BABEL_HOLD_TIME 10 /* Expiry time for our own routes */ +#define BABEL_RXCOST_WIRED 96 +#define BABEL_RXCOST_WIRELESS 256 +#define BABEL_INITIAL_HOP_COUNT 255 +#define BABEL_MAX_SEND_INTERVAL 5 +#define BABEL_TIME_UNITS 100 /* On-wire times are counted in centiseconds */ + +#define BABEL_SEQNO_REQUEST_EXPIRY 60 +#define BABEL_GARBAGE_INTERVAL 300 + +#define BABEL_OVERHEAD (SIZE_OF_IP_HEADER+UDP_HEADER_LENGTH) +#define BABEL_MIN_MTU (512 + BABEL_OVERHEAD) + + +enum babel_tlv_type { + BABEL_TLV_PAD1 = 0, + BABEL_TLV_PADN = 1, + BABEL_TLV_ACK_REQ = 2, + BABEL_TLV_ACK = 3, + BABEL_TLV_HELLO = 4, + BABEL_TLV_IHU = 5, + BABEL_TLV_ROUTER_ID = 6, + BABEL_TLV_NEXT_HOP = 7, + BABEL_TLV_UPDATE = 8, + BABEL_TLV_ROUTE_REQUEST = 9, + BABEL_TLV_SEQNO_REQUEST = 10, + /* extensions - not implemented + BABEL_TLV_TS_PC = 11, + BABEL_TLV_HMAC = 12, + BABEL_TLV_SS_UPDATE = 13, + BABEL_TLV_SS_REQUEST = 14, + BABEL_TLV_SS_SEQNO_REQUEST = 15, + */ + BABEL_TLV_MAX +}; + +enum babel_iface_type { + /* In practice, UNDEF and WIRED give equivalent behaviour */ + BABEL_IFACE_TYPE_UNDEF = 0, + BABEL_IFACE_TYPE_WIRED = 1, + BABEL_IFACE_TYPE_WIRELESS = 2, + BABEL_IFACE_TYPE_MAX +}; + +enum babel_ae_type { + BABEL_AE_WILDCARD = 0, + BABEL_AE_IP4 = 1, + BABEL_AE_IP6 = 2, + BABEL_AE_IP6_LL = 3, + BABEL_AE_MAX +}; + + +struct babel_config { + struct proto_config c; + + list iface_list; /* Patterns configured -- keep it first; see babel_reconfigure why */ +}; + +struct babel_iface_config { + struct iface_patt i; + + u16 rxcost; + u8 type; + u8 check_link; + int port; + u16 hello_interval; + u16 ihu_interval; + u16 update_interval; + + u16 rx_buffer; /* RX buffer size, 0 for MTU */ + u16 tx_length; /* TX packet length limit (including headers), 0 for MTU */ + int tx_tos; + int tx_priority; +}; + +struct babel_proto { + struct proto p; + timer *timer; + struct fib rtable; + list interfaces; /* Interfaces we really know about (struct babel_iface) */ + u64 router_id; + u16 update_seqno; /* To be increased on request */ + u8 triggered; /* For triggering global updates */ + + slab *route_slab; + slab *source_slab; + slab *msg_slab; + + slab *seqno_slab; + list seqno_cache; /* Seqno requests in the cache (struct babel_seqno_request) */ + + struct tbf log_pkt_tbf; /* TBF for packet messages */ +}; + +struct babel_iface { + node n; + + struct babel_proto *proto; + struct iface *iface; + + struct babel_iface_config *cf; + + u8 up; + + pool *pool; + char *ifname; + sock *sk; + ip_addr addr; + int tx_length; + list neigh_list; /* List of neighbors seen on this iface (struct babel_neighbor) */ + list msg_queue; + + u16 hello_seqno; /* To be increased on each hello */ + + bird_clock_t next_hello; + bird_clock_t next_regular; + bird_clock_t next_triggered; + bird_clock_t want_triggered; + + timer *timer; + event *send_event; +}; + +struct babel_neighbor { + node n; + struct babel_iface *ifa; + + ip_addr addr; + u16 txcost; + u8 hello_cnt; + u16 hello_map; + u16 next_hello_seqno; + /* expiry timers */ + bird_clock_t hello_expiry; + bird_clock_t ihu_expiry; + + list routes; /* Routes this neighbour has sent us (struct babel_route) */ +}; + +struct babel_source { + node n; + + u64 router_id; + u16 seqno; + u16 metric; + bird_clock_t expires; +}; + +struct babel_route { + node n; + node neigh_route; + struct babel_entry *e; + struct babel_neighbor *neigh; + + u16 seqno; + u16 advert_metric; + u16 metric; + u64 router_id; + ip_addr next_hop; + bird_clock_t refresh_time; + bird_clock_t expires; + u16 expiry_interval; +}; + +struct babel_entry { + struct fib_node n; + struct babel_proto *proto; + struct babel_route *selected_in; + struct babel_route *selected_out; + + bird_clock_t updated; + + list sources; /* Source entries for this prefix (struct babel_source). */ + list routes; /* Routes for this prefix (struct babel_route) */ +}; + +/* Stores forwarded seqno requests for duplicate suppression. */ +struct babel_seqno_request { + node n; + ip_addr prefix; + u8 plen; + u64 router_id; + u16 seqno; + bird_clock_t updated; +}; + + +/* + * Internal TLV messages + */ + +struct babel_msg_ack_req { + u8 type; + u16 nonce; + u16 interval; + ip_addr sender; +}; + +struct babel_msg_ack { + u8 type; + u16 nonce; +}; + +struct babel_msg_hello { + u8 type; + u16 seqno; + u16 interval; + ip_addr sender; +}; + +struct babel_msg_ihu { + u8 type; + u8 ae; + u16 rxcost; + u16 interval; + ip_addr addr; + ip_addr sender; +}; + +struct babel_msg_update { + u8 type; + u8 ae; + u8 plen; + u16 interval; + u16 seqno; + u16 metric; + ip_addr prefix; + u64 router_id; + ip_addr next_hop; + ip_addr sender; +}; + +struct babel_msg_route_request { + u8 type; + u8 full; + u8 plen; + ip_addr prefix; +}; + +struct babel_msg_seqno_request { + u8 type; + u8 plen; + u16 seqno; + u8 hop_count; + u64 router_id; + ip_addr prefix; + ip_addr sender; +}; + +union babel_msg { + u8 type; + struct babel_msg_ack_req ack_req; + struct babel_msg_ack ack; + struct babel_msg_hello hello; + struct babel_msg_ihu ihu; + struct babel_msg_update update; + struct babel_msg_route_request route_request; + struct babel_msg_seqno_request seqno_request; +}; + +struct babel_msg_node { + node n; + union babel_msg msg; +}; + + +/* babel.c */ +void babel_handle_ack_req(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_ack(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_hello(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_ihu(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_router_id(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_update(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_route_request(union babel_msg *msg, struct babel_iface *ifa); +void babel_handle_seqno_request(union babel_msg *msg, struct babel_iface *ifa); + +void babel_show_interfaces(struct proto *P, char *iff); +void babel_show_neighbors(struct proto *P, char *iff); +void babel_show_entries(struct proto *P); + +/* packets.c */ +void babel_enqueue(union babel_msg *msg, struct babel_iface *ifa); +void babel_send_unicast(union babel_msg *msg, struct babel_iface *ifa, ip_addr dest); +int babel_open_socket(struct babel_iface *ifa); +void babel_send_queue(void *arg); + + +#endif diff --git a/proto/babel/config.Y b/proto/babel/config.Y new file mode 100644 index 00000000..e7ce6a93 --- /dev/null +++ b/proto/babel/config.Y @@ -0,0 +1,129 @@ +/* + * BIRD -- Babel Configuration + * + * Copyright (c) 2015-2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + + + +CF_HDR + +#include "proto/babel/babel.h" +#include "nest/iface.h" + +CF_DEFINES + +#define BABEL_CFG ((struct babel_config *) this_proto) +#define BABEL_IFACE ((struct babel_iface_config *) this_ipatt) + +CF_DECLS + +CF_KEYWORDS(BABEL, METRIC, RXCOST, HELLO, UPDATE, INTERVAL, PORT, WIRED, +WIRELESS, RX, TX, BUFFER, LENGTH, CHECK, LINK, BABEL_METRIC) + +CF_GRAMMAR + +CF_ADDTO(proto, babel_proto) + +babel_proto_start: proto_start BABEL +{ + this_proto = proto_config_new(&proto_babel, $1); + init_list(&BABEL_CFG->iface_list); +}; + +babel_proto_item: + proto_item + | INTERFACE babel_iface + ; + +babel_proto_opts: + /* empty */ + | babel_proto_opts babel_proto_item ';' + ; + +babel_proto: + babel_proto_start proto_name '{' babel_proto_opts '}'; + + +babel_iface_start: +{ + this_ipatt = cfg_allocz(sizeof(struct babel_iface_config)); + add_tail(&BABEL_CFG->iface_list, NODE this_ipatt); + init_list(&this_ipatt->ipn_list); + BABEL_IFACE->port = BABEL_PORT; + BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; + BABEL_IFACE->tx_tos = IP_PREC_INTERNET_CONTROL; + BABEL_IFACE->tx_priority = sk_priority_control; + BABEL_IFACE->check_link = 1; +}; + + +babel_iface_finish: +{ + if (BABEL_IFACE->type == BABEL_IFACE_TYPE_WIRELESS) + { + if (!BABEL_IFACE->hello_interval) + BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRELESS; + if (!BABEL_IFACE->rxcost) + BABEL_IFACE->rxcost = BABEL_RXCOST_WIRELESS; + } + else + { + if (!BABEL_IFACE->hello_interval) + BABEL_IFACE->hello_interval = BABEL_HELLO_INTERVAL_WIRED; + if (!BABEL_IFACE->rxcost) + BABEL_IFACE->rxcost = BABEL_RXCOST_WIRED; + } + + if (!BABEL_IFACE->update_interval) + BABEL_IFACE->update_interval = BABEL_IFACE->hello_interval*BABEL_UPDATE_INTERVAL_FACTOR; + BABEL_IFACE->ihu_interval = BABEL_IFACE->hello_interval*BABEL_IHU_INTERVAL_FACTOR; +}; + + +babel_iface_item: + | PORT expr { BABEL_IFACE->port = $2; if (($2<1) || ($2>65535)) cf_error("Invalid port number"); } + | RXCOST expr { BABEL_IFACE->rxcost = $2; if (($2<1) || ($2>65535)) cf_error("Invalid rxcost"); } + | HELLO INTERVAL expr { BABEL_IFACE->hello_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } + | UPDATE INTERVAL expr { BABEL_IFACE->update_interval = $3; if (($3<1) || ($3>65535)) cf_error("Invalid hello interval"); } + | TYPE WIRED { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRED; } + | TYPE WIRELESS { BABEL_IFACE->type = BABEL_IFACE_TYPE_WIRELESS; } + | RX BUFFER expr { BABEL_IFACE->rx_buffer = $3; if (($3<256) || ($3>65535)) cf_error("RX buffer must be in range 256-65535"); } + | TX LENGTH expr { BABEL_IFACE->tx_length = $3; if (($3<256) || ($3>65535)) cf_error("TX length must be in range 256-65535"); } + | TX tos { BABEL_IFACE->tx_tos = $2; } + | TX PRIORITY expr { BABEL_IFACE->tx_priority = $3; } + | CHECK LINK bool { BABEL_IFACE->check_link = $3; } + ; + +babel_iface_opts: + /* empty */ + | babel_iface_opts babel_iface_item ';' + ; + +babel_iface_opt_list: + /* empty */ + | '{' babel_iface_opts '}' + ; + + +babel_iface: + babel_iface_start iface_patt_list_nopx babel_iface_opt_list babel_iface_finish; + +CF_ADDTO(dynamic_attr, BABEL_METRIC { $$ = f_new_dynamic_attr(EAF_TYPE_INT | EAF_TEMP, T_INT, EA_BABEL_METRIC); }) + +CF_CLI_HELP(SHOW BABEL, ..., [[Show information about Babel protocol]]); + +CF_CLI(SHOW BABEL INTERFACES, optsym opttext, [] [\"\"], [[Show information about Babel interfaces]]) +{ babel_show_interfaces(proto_get_named($4, &proto_babel), $5); }; + +CF_CLI(SHOW BABEL NEIGHBORS, optsym opttext, [] [\"\"], [[Show information about Babel neighbors]]) +{ babel_show_neighbors(proto_get_named($4, &proto_babel), $5); }; + +CF_CLI(SHOW BABEL ENTRIES, optsym opttext, [], [[Show information about Babel prefix entries]]) +{ babel_show_entries(proto_get_named($4, &proto_babel)); }; + +CF_CODE + +CF_END diff --git a/proto/babel/packets.c b/proto/babel/packets.c new file mode 100644 index 00000000..be47aa75 --- /dev/null +++ b/proto/babel/packets.c @@ -0,0 +1,1093 @@ +/* + * BIRD -- The Babel protocol + * + * Copyright (c) 2015--2016 Toke Hoiland-Jorgensen + * + * Can be freely distributed and used under the terms of the GNU GPL. + * + * This file contains the packet and TLV handling code for the protocol. + */ + +#include "babel.h" + + +struct babel_pkt_header { + u8 magic; + u8 version; + u16 length; +} PACKED; + +struct babel_tlv { + u8 type; + u8 length; + u8 value[0]; +} PACKED; + +struct babel_tlv_ack_req { + u8 type; + u8 length; + u16 reserved; + u16 nonce; + u16 interval; +} PACKED; + +struct babel_tlv_ack { + u8 type; + u8 length; + u16 nonce; +} PACKED; + +struct babel_tlv_hello { + u8 type; + u8 length; + u16 reserved; + u16 seqno; + u16 interval; +} PACKED; + +struct babel_tlv_ihu { + u8 type; + u8 length; + u8 ae; + u8 reserved; + u16 rxcost; + u16 interval; + u8 addr[0]; +} PACKED; + +struct babel_tlv_router_id { + u8 type; + u8 length; + u16 reserved; + u64 router_id; +} PACKED; + +struct babel_tlv_next_hop { + u8 type; + u8 length; + u8 ae; + u8 reserved; + u8 addr[0]; +} PACKED; + +struct babel_tlv_update { + u8 type; + u8 length; + u8 ae; + u8 flags; + u8 plen; + u8 omitted; + u16 interval; + u16 seqno; + u16 metric; + u8 addr[0]; +} PACKED; + +struct babel_tlv_route_request { + u8 type; + u8 length; + u8 ae; + u8 plen; + u8 addr[0]; +} PACKED; + +struct babel_tlv_seqno_request { + u8 type; + u8 length; + u8 ae; + u8 plen; + u16 seqno; + u8 hop_count; + u8 reserved; + u64 router_id; + u8 addr[0]; +} PACKED; + + +#define BABEL_FLAG_DEF_PREFIX 0x80 +#define BABEL_FLAG_ROUTER_ID 0x40 + + +struct babel_parse_state { + struct babel_proto *proto; + struct babel_iface *ifa; + ip_addr saddr; + ip_addr next_hop; + u64 router_id; /* Router ID used in subsequent updates */ + u8 def_ip6_prefix[16]; /* Implicit IPv6 prefix in network order */ + u8 def_ip4_prefix[4]; /* Implicit IPv4 prefix in network order */ + u8 router_id_seen; /* router_id field is valid */ + u8 def_ip6_prefix_seen; /* def_ip6_prefix is valid */ + u8 def_ip4_prefix_seen; /* def_ip4_prefix is valid */ +}; + +enum parse_result { + PARSE_SUCCESS, + PARSE_ERROR, + PARSE_IGNORE, +}; + +struct babel_write_state { + u64 router_id; + u8 router_id_seen; +// ip_addr next_hop; +}; + + +#define DROP(DSC,VAL) do { err_dsc = DSC; err_val = VAL; goto drop; } while(0) +#define DROP1(DSC) do { err_dsc = DSC; goto drop; } while(0) +#define LOG_PKT(msg, args...) \ + log_rl(&p->log_pkt_tbf, L_REMOTE "%s: " msg, p->p.name, args) + +#define FIRST_TLV(p) ((struct babel_tlv *) (((struct babel_pkt_header *) p) + 1)) +#define NEXT_TLV(t) ((struct babel_tlv *) (((byte *) t) + TLV_LENGTH(t))) +#define TLV_LENGTH(t) (t->type == BABEL_TLV_PAD1 ? 1 : t->length + sizeof(struct babel_tlv)) +#define TLV_OPT_LENGTH(t) (t->length + sizeof(struct babel_tlv) - sizeof(*t)) +#define TLV_HDR(tlv,t,l) ({ tlv->type = t; tlv->length = l - sizeof(struct babel_tlv); }) +#define TLV_HDR0(tlv,t) TLV_HDR(tlv, t, tlv_data[t].min_length) + + +static inline u16 +get_time16(const void *p) +{ + u16 v = get_u16(p) / BABEL_TIME_UNITS; + return MAX(1, v); +} + +static inline void +put_time16(void *p, u16 v) +{ + put_u16(p, v * BABEL_TIME_UNITS); +} + +static inline ip6_addr +get_ip6_px(const void *p, int plen) +{ + ip6_addr addr = IPA_NONE; + memcpy(&addr, p, (plen + 7) / 8); + return ip6_ntoh(addr); +} + +static inline void +put_ip6_px(void *p, ip6_addr addr, int plen) +{ + addr = ip6_hton(addr); + memcpy(p, &addr, (plen + 7) / 8); +} + +static inline ip6_addr +get_ip6_ll(const void *p) +{ + return ip6_build(0xfe800000, 0, get_u32(p+0), get_u32(p+4)); +} + +static inline void +put_ip6_ll(void *p, ip6_addr addr) +{ + put_u32(p+0, _I2(addr)); + put_u32(p+4, _I3(addr)); +} + + +/* + * TLV read/write functions + */ + +static int babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_router_id(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); +static int babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_parse_state *state); + +static int babel_write_ack(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_hello(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_ihu(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_update(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_route_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); +static int babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *msg, struct babel_write_state *state, int max_len); + +struct babel_tlv_data { + u8 min_length; + int (*read_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_parse_state *state); + int (*write_tlv)(struct babel_tlv *hdr, union babel_msg *m, struct babel_write_state *state, int max_len); + void (*handle_tlv)(union babel_msg *m, struct babel_iface *ifa); +}; + +const static struct babel_tlv_data tlv_data[BABEL_TLV_MAX] = { + [BABEL_TLV_ACK_REQ] = { + sizeof(struct babel_tlv_ack_req), + babel_read_ack_req, + NULL, + babel_handle_ack_req + }, + [BABEL_TLV_ACK] = { + sizeof(struct babel_tlv_ack), + NULL, + babel_write_ack, + NULL + }, + [BABEL_TLV_HELLO] = { + sizeof(struct babel_tlv_hello), + babel_read_hello, + babel_write_hello, + babel_handle_hello + }, + [BABEL_TLV_IHU] = { + sizeof(struct babel_tlv_ihu), + babel_read_ihu, + babel_write_ihu, + babel_handle_ihu + }, + [BABEL_TLV_ROUTER_ID] = { + sizeof(struct babel_tlv_router_id), + babel_read_router_id, + NULL, + NULL + }, + [BABEL_TLV_NEXT_HOP] = { + sizeof(struct babel_tlv_next_hop), + babel_read_next_hop, + NULL, + NULL + }, + [BABEL_TLV_UPDATE] = { + sizeof(struct babel_tlv_update), + babel_read_update, + babel_write_update, + babel_handle_update + }, + [BABEL_TLV_ROUTE_REQUEST] = { + sizeof(struct babel_tlv_route_request), + babel_read_route_request, + babel_write_route_request, + babel_handle_route_request + }, + [BABEL_TLV_SEQNO_REQUEST] = { + sizeof(struct babel_tlv_seqno_request), + babel_read_seqno_request, + babel_write_seqno_request, + babel_handle_seqno_request + }, +}; + +static int +babel_read_ack_req(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_ack_req *tlv = (void *) hdr; + struct babel_msg_ack_req *msg = &m->ack_req; + + msg->type = BABEL_TLV_ACK_REQ; + msg->nonce = get_u16(&tlv->nonce); + msg->interval = get_time16(&tlv->interval); + msg->sender = state->saddr; + + if (!msg->interval) + return PARSE_ERROR; + + return PARSE_SUCCESS; +} + +static int +babel_write_ack(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_ack *tlv = (void *) hdr; + struct babel_msg_ack *msg = &m->ack; + + TLV_HDR0(tlv, BABEL_TLV_ACK); + put_u16(&tlv->nonce, msg->nonce); + + return sizeof(struct babel_tlv_ack); +} + +static int +babel_read_hello(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_hello *tlv = (void *) hdr; + struct babel_msg_hello *msg = &m->hello; + + msg->type = BABEL_TLV_HELLO; + msg->seqno = get_u16(&tlv->seqno); + msg->interval = get_time16(&tlv->interval); + msg->sender = state->saddr; + + return PARSE_SUCCESS; +} + +static int +babel_write_hello(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_hello *tlv = (void *) hdr; + struct babel_msg_hello *msg = &m->hello; + + TLV_HDR0(tlv, BABEL_TLV_HELLO); + put_u16(&tlv->seqno, msg->seqno); + put_time16(&tlv->interval, msg->interval); + + return sizeof(struct babel_tlv_hello); +} + +static int +babel_read_ihu(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_ihu *tlv = (void *) hdr; + struct babel_msg_ihu *msg = &m->ihu; + + msg->type = BABEL_TLV_IHU; + msg->ae = tlv->ae; + msg->rxcost = get_u16(&tlv->rxcost); + msg->interval = get_time16(&tlv->interval); + msg->addr = IPA_NONE; + msg->sender = state->saddr; + + if (msg->ae >= BABEL_AE_MAX) + return PARSE_IGNORE; + + // We handle link-local IPs. In every other case, the addr field will be 0 but + // validation will succeed. The handler takes care of these cases. + if (msg->ae == BABEL_AE_IP6_LL) + { + if (TLV_OPT_LENGTH(tlv) < 8) + return PARSE_ERROR; + + msg->addr = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + } + + return PARSE_SUCCESS; +} + +static int +babel_write_ihu(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_ihu *tlv = (void *) hdr; + struct babel_msg_ihu *msg = &m->ihu; + + if (ipa_is_link_local(msg->addr) && max_len < sizeof(struct babel_tlv_ihu) + 8) + return 0; + + TLV_HDR0(tlv, BABEL_TLV_IHU); + put_u16(&tlv->rxcost, msg->rxcost); + put_time16(&tlv->interval, msg->interval); + + if (!ipa_is_link_local(msg->addr)) + { + tlv->ae = BABEL_AE_WILDCARD; + return sizeof(struct babel_tlv_ihu); + } + put_ip6_ll(&tlv->addr, msg->addr); + tlv->ae = BABEL_AE_IP6_LL; + hdr->length += 8; + return sizeof(struct babel_tlv_ihu) + 8; +} + +static int +babel_read_router_id(struct babel_tlv *hdr, union babel_msg *m UNUSED, + struct babel_parse_state *state) +{ + struct babel_tlv_router_id *tlv = (void *) hdr; + + state->router_id = get_u64(&tlv->router_id); + state->router_id_seen = 1; + + return PARSE_IGNORE; +} + +/* This is called directly from babel_write_update() */ +static int +babel_write_router_id(struct babel_tlv *hdr, u64 router_id, + struct babel_write_state *state, int max_len UNUSED) +{ + struct babel_tlv_router_id *tlv = (void *) hdr; + + /* We still assume that first min_length bytes are available and zeroed */ + + TLV_HDR0(tlv, BABEL_TLV_ROUTER_ID); + put_u64(&tlv->router_id, router_id); + + state->router_id = router_id; + state->router_id_seen = 1; + + return sizeof(struct babel_tlv_router_id); +} + +static int +babel_read_next_hop(struct babel_tlv *hdr, union babel_msg *m UNUSED, + struct babel_parse_state *state) +{ + struct babel_tlv_next_hop *tlv = (void *) hdr; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + return PARSE_ERROR; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (TLV_OPT_LENGTH(tlv) < sizeof(ip6_addr)) + return PARSE_ERROR; + + state->next_hop = ipa_from_ip6(get_ip6(&tlv->addr)); + return PARSE_IGNORE; + + case BABEL_AE_IP6_LL: + if (TLV_OPT_LENGTH(tlv) < 8) + return PARSE_ERROR; + + state->next_hop = ipa_from_ip6(get_ip6_ll(&tlv->addr)); + return PARSE_IGNORE; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_read_update(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_update *tlv = (void *) hdr; + struct babel_msg_update *msg = &m->update; + + msg->type = BABEL_TLV_UPDATE; + msg->ae = tlv->ae; + msg->interval = get_time16(&tlv->interval); + msg->seqno = get_u16(&tlv->seqno); + msg->metric = get_u16(&tlv->metric); + + /* Length of received prefix data without omitted part */ + int len = (tlv->plen + 7)/8 - (int) tlv->omitted; + u8 buf[16] = {}; + + if ((len < 0) || (len > TLV_OPT_LENGTH(tlv))) + return PARSE_ERROR; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + if (tlv->plen > 0) + return PARSE_ERROR; + + msg->prefix = IPA_NONE; + break; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + /* Cannot omit data if there is no saved prefix */ + if (tlv->omitted && !state->def_ip6_prefix_seen) + return PARSE_ERROR; + + /* Merge saved prefix and received prefix parts */ + memcpy(buf, state->def_ip6_prefix, tlv->omitted); + memcpy(buf + tlv->omitted, tlv->addr, len); + + msg->plen = tlv->plen; + msg->prefix = ipa_from_ip6(get_ip6(buf)); + + if (tlv->flags & BABEL_FLAG_DEF_PREFIX) + { + put_ip6(state->def_ip6_prefix, msg->prefix); + state->def_ip6_prefix_seen = 1; + } + + if (tlv->flags & BABEL_FLAG_ROUTER_ID) + { + state->router_id = ((u64) _I2(msg->prefix)) << 32 | _I3(msg->prefix); + state->router_id_seen = 1; + } + break; + + case BABEL_AE_IP6_LL: + /* ??? */ + return PARSE_IGNORE; + + default: + return PARSE_IGNORE; + } + + if (!state->router_id_seen) + { + DBG("Babel: No router ID seen before update\n"); + return PARSE_ERROR; + } + + msg->router_id = state->router_id; + msg->next_hop = state->next_hop; + msg->sender = state->saddr; + + return PARSE_SUCCESS; +} + +static int +babel_write_update(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_update *tlv = (void *) hdr; + struct babel_msg_update *msg = &m->update; + int len0 = 0; + + /* + * When needed, we write Router-ID TLV before Update TLV and return size of + * both of them. There is enough space for the Router-ID TLV, because + * sizeof(struct babel_tlv_router_id) == sizeof(struct babel_tlv_update). + */ + if (!state->router_id_seen || (msg->router_id != state->router_id)) + { + len0 = babel_write_router_id(hdr, msg->router_id, state, max_len); + tlv = (struct babel_tlv_update *) NEXT_TLV(tlv); + } + + int len = sizeof(struct babel_tlv_update) + (msg->plen + 7)/8; + + if (len0 + len > max_len) + return 0; + + memset(tlv, 0, sizeof(struct babel_tlv_update)); + TLV_HDR(tlv, BABEL_TLV_UPDATE, len); + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_time16(&tlv->interval, msg->interval); + put_u16(&tlv->seqno, msg->seqno); + put_u16(&tlv->metric, msg->metric); + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + + return len0 + len; +} + +static int +babel_read_route_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_route_request *tlv = (void *) hdr; + struct babel_msg_route_request *msg = &m->route_request; + + msg->type = BABEL_TLV_ROUTE_REQUEST; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + /* Wildcard requests must have plen 0 */ + if (tlv->plen > 0) + return PARSE_ERROR; + + msg->full = 1; + return PARSE_SUCCESS; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < (tlv->plen + 7)/8) + return PARSE_ERROR; + + msg->plen = tlv->plen; + msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + return PARSE_SUCCESS; + + case BABEL_AE_IP6_LL: + return PARSE_ERROR; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_write_route_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_route_request *tlv = (void *) hdr; + struct babel_msg_route_request *msg = &m->route_request; + + int len = sizeof(struct babel_tlv_route_request) + (msg->plen + 7)/8; + + if (len > max_len) + return 0; + + TLV_HDR(tlv, BABEL_TLV_ROUTE_REQUEST, len); + + if (msg->full) + { + tlv->ae = BABEL_AE_WILDCARD; + tlv->plen = 0; + } + else + { + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + } + + return len; +} + +static int +babel_read_seqno_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_parse_state *state) +{ + struct babel_tlv_seqno_request *tlv = (void *) hdr; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + msg->type = BABEL_TLV_SEQNO_REQUEST; + msg->seqno = get_u16(&tlv->seqno); + msg->hop_count = tlv->hop_count; + msg->router_id = get_u64(&tlv->router_id); + msg->sender = state->saddr; + + if (tlv->hop_count == 0) + return PARSE_ERROR; + + switch (tlv->ae) + { + case BABEL_AE_WILDCARD: + return PARSE_ERROR; + + case BABEL_AE_IP4: + /* TODO */ + return PARSE_IGNORE; + + case BABEL_AE_IP6: + if (tlv->plen > MAX_PREFIX_LENGTH) + return PARSE_ERROR; + + if (TLV_OPT_LENGTH(tlv) < (tlv->plen + 7)/8) + return PARSE_ERROR; + + msg->plen = tlv->plen; + msg->prefix = get_ip6_px(tlv->addr, tlv->plen); + return PARSE_SUCCESS; + + case BABEL_AE_IP6_LL: + return PARSE_ERROR; + + default: + return PARSE_IGNORE; + } + + return PARSE_IGNORE; +} + +static int +babel_write_seqno_request(struct babel_tlv *hdr, union babel_msg *m, + struct babel_write_state *state, int max_len) +{ + struct babel_tlv_seqno_request *tlv = (void *) hdr; + struct babel_msg_seqno_request *msg = &m->seqno_request; + + int len = sizeof(struct babel_tlv_seqno_request) + (msg->plen + 7)/8; + + if (len > max_len) + return 0; + + TLV_HDR(tlv, BABEL_TLV_SEQNO_REQUEST, len); + tlv->ae = BABEL_AE_IP6; + tlv->plen = msg->plen; + put_u16(&tlv->seqno, msg->seqno); + tlv->hop_count = msg->hop_count; + put_u64(&tlv->router_id, msg->router_id); + put_ip6_px(tlv->addr, msg->prefix, msg->plen); + + return len; +} + +static inline int +babel_read_tlv(struct babel_tlv *hdr, + union babel_msg *msg, + struct babel_parse_state *state) +{ + if ((hdr->type <= BABEL_TLV_PADN) || + (hdr->type >= BABEL_TLV_MAX) || + !tlv_data[hdr->type].read_tlv) + return PARSE_IGNORE; + + if (TLV_LENGTH(hdr) < tlv_data[hdr->type].min_length) + return PARSE_ERROR; + + memset(msg, 0, sizeof(*msg)); + return tlv_data[hdr->type].read_tlv(hdr, msg, state); +} + +static int +babel_write_tlv(struct babel_tlv *hdr, + union babel_msg *msg, + struct babel_write_state *state, + int max_len) +{ + if ((msg->type <= BABEL_TLV_PADN) || + (msg->type >= BABEL_TLV_MAX) || + !tlv_data[msg->type].write_tlv) + return 0; + + if (tlv_data[msg->type].min_length > max_len) + return 0; + + memset(hdr, 0, tlv_data[msg->type].min_length); + return tlv_data[msg->type].write_tlv(hdr, msg, state, max_len); +} + + +/* + * Packet RX/TX functions + */ + +static int +babel_send_to(struct babel_iface *ifa, ip_addr dest) +{ + sock *sk = ifa->sk; + struct babel_pkt_header *hdr = (void *) sk->tbuf; + int len = get_u16(&hdr->length) + sizeof(struct babel_pkt_header); + + DBG("Babel: Sending %d bytes to %I\n", len, dest); + return sk_send_to(sk, len, dest, 0); +} + +/** + * babel_write_queue - Write a TLV queue to a transmission buffer + * @ifa: Interface holding the transmission buffer + * @queue: TLV queue to write (containing internal-format TLVs) + * + * This function writes a packet to the interface transmission buffer with as + * many TLVs from the &queue as will fit in the buffer. It returns the number of + * bytes written (NOT counting the packet header). The function is called by + * babel_send_queue() and babel_send_unicast() to construct packets for + * transmission, and uses per-TLV helper functions to convert the + * internal-format TLVs to their wire representations. + * + * The TLVs in the queue are freed after they are written to the buffer. + */ +static int +babel_write_queue(struct babel_iface *ifa, list *queue) +{ + struct babel_proto *p = ifa->proto; + struct babel_write_state state = {}; + + if (EMPTY_LIST(*queue)) + return 0; + + byte *pos = ifa->sk->tbuf; + byte *end = pos + ifa->tx_length; + + struct babel_pkt_header *pkt = (void *) pos; + pkt->magic = BABEL_MAGIC; + pkt->version = BABEL_VERSION; + pkt->length = 0; + pos += sizeof(struct babel_pkt_header); + + struct babel_msg_node *msg; + WALK_LIST_FIRST(msg, *queue) + { + int len = babel_write_tlv((struct babel_tlv *) pos, &msg->msg, &state, end - pos); + + if (!len) + break; + + pos += len; + rem_node(NODE msg); + sl_free(p->msg_slab, msg); + } + + int plen = pos - (byte *) pkt; + put_u16(&pkt->length, plen - sizeof(struct babel_pkt_header)); + + return plen; +} + +void +babel_send_queue(void *arg) +{ + struct babel_iface *ifa = arg; + while ((babel_write_queue(ifa, &ifa->msg_queue) > 0) && + (babel_send_to(ifa, IP6_BABEL_ROUTERS) > 0)); +} + +static inline void +babel_kick_queue(struct babel_iface *ifa) +{ + /* + * Only schedule send event if there is not already data in the socket buffer. + * Otherwise we may overwrite the data already in the buffer. + */ + + if ((ifa->sk->tpos == ifa->sk->tbuf) && !ev_active(ifa->send_event)) + ev_schedule(ifa->send_event); +} + +/** + * babel_send_unicast - send a single TLV via unicast to a destination + * @msg: TLV to send + * @ifa: Interface to send via + * @dest: Destination of the TLV + * + * This function is used to send a single TLV via unicast to a designated + * receiver. This is used for replying to certain incoming requests, and for + * sending unicast requests to refresh routes before they expire. + */ +void +babel_send_unicast(union babel_msg *msg, struct babel_iface *ifa, ip_addr dest) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_node *msgn = sl_alloc(p->msg_slab); + list queue; + + msgn->msg = *msg; + init_list(&queue); + add_tail(&queue, NODE msgn); + babel_write_queue(ifa, &queue); + babel_send_to(ifa, dest); + + /* We could overwrite waiting packet here, we may have to kick TX queue */ + if (!EMPTY_LIST(ifa->msg_queue)) + babel_kick_queue(ifa); +} + +/** + * babel_enqueue - enqueue a TLV for transmission on an interface + * @msg: TLV to enqueue (in internal TLV format) + * @ifa: Interface to enqueue to + * + * This function is called to enqueue a TLV for subsequent transmission on an + * interface. The transmission event is triggered whenever a TLV is enqueued; + * this ensures that TLVs will be transmitted in a timely manner, but that TLVs + * which are enqueued in rapid succession can be transmitted together in one + * packet. + */ +void +babel_enqueue(union babel_msg *msg, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_msg_node *msgn = sl_alloc(p->msg_slab); + msgn->msg = *msg; + add_tail(&ifa->msg_queue, NODE msgn); + babel_kick_queue(ifa); +} + +/** + * babel_process_packet - process incoming data packet + * @pkt: Pointer to the packet data + * @len: Length of received packet + * @saddr: Address of packet sender + * @ifa: Interface packet was received on. + * + * This function is the main processing hook of incoming Babel packets. It + * checks that the packet header is well-formed, then processes the TLVs + * contained in the packet. This is done in two passes: First all TLVs are + * parsed into the internal TLV format. If a TLV parser fails, processing of the + * rest of the packet is aborted. + * + * After the parsing step, the TLV handlers are called for each parsed TLV in + * order. + */ +static void +babel_process_packet(struct babel_pkt_header *pkt, int len, + ip_addr saddr, struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + struct babel_tlv *tlv; + struct babel_msg_node *msg; + list msgs; + int res; + + int plen = sizeof(struct babel_pkt_header) + get_u16(&pkt->length); + byte *pos; + byte *end = (byte *)pkt + plen; + + struct babel_parse_state state = { + .proto = p, + .ifa = ifa, + .saddr = saddr, + .next_hop = saddr, + }; + + if ((pkt->magic != BABEL_MAGIC) || (pkt->version != BABEL_VERSION)) + { + TRACE(D_PACKETS, "Strange packet from %I via %s - magic %d version %d", + saddr, ifa->iface->name, pkt->magic, pkt->version); + return; + } + + if (plen > len) + { + LOG_PKT("Bad packet from %I via %s - %s (%u)", + saddr, ifa->iface->name, "length mismatch", plen); + return; + } + + TRACE(D_PACKETS, "Packet received from %I via %s", + saddr, ifa->iface->name); + + init_list(&msgs); + + /* First pass through the packet TLV by TLV, parsing each into internal data + structures. */ + for (tlv = FIRST_TLV(pkt); + (byte *)tlv < end; + tlv = NEXT_TLV(tlv)) + { + /* Ugly special case */ + if (tlv->type == BABEL_TLV_PAD1) + continue; + + /* The end of the common TLV header */ + pos = (byte *)tlv + sizeof(struct babel_tlv); + if ((pos > end) || (pos + tlv->length > end)) + { + LOG_PKT("Bad TLV from %I via %s type %d pos %d - framing error", + saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt); + break; + } + + msg = sl_alloc(p->msg_slab); + res = babel_read_tlv(tlv, &msg->msg, &state); + if (res == PARSE_SUCCESS) + { + add_tail(&msgs, NODE msg); + } + else if (res == PARSE_IGNORE) + { + DBG("Babel: Ignoring TLV of type %d\n", tlv->type); + sl_free(p->msg_slab, msg); + } + else /* PARSE_ERROR */ + { + LOG_PKT("Bad TLV from %I via %s type %d pos %d - parse error", + saddr, ifa->iface->name, tlv->type, (byte *)tlv - (byte *)pkt); + sl_free(p->msg_slab, msg); + break; + } + } + + /* Parsing done, handle all parsed TLVs */ + WALK_LIST_FIRST(msg, msgs) + { + if (tlv_data[msg->msg.type].handle_tlv) + tlv_data[msg->msg.type].handle_tlv(&msg->msg, ifa); + rem_node(NODE msg); + sl_free(p->msg_slab, msg); + } +} + +static void +babel_err_hook(sock *sk, int err) +{ + struct babel_iface *ifa = sk->data; + struct babel_proto *p = ifa->proto; + + log(L_ERR "%s: Socket error on %s: %M", p->p.name, ifa->iface->name, err); + /* FIXME: Drop queued TLVs here? */ +} + + +static void +babel_tx_hook(sock *sk) +{ + struct babel_iface *ifa = sk->data; + + DBG("Babel: TX hook called (iface %s, src %I, dst %I)\n", + sk->iface->name, sk->saddr, sk->daddr); + + babel_send_queue(ifa); +} + + +static int +babel_rx_hook(sock *sk, int len) +{ + struct babel_iface *ifa = sk->data; + struct babel_proto *p = ifa->proto; + const char *err_dsc = NULL; + uint err_val = 0; + + if (sk->lifindex != ifa->iface->index) + return 1; + + DBG("Babel: RX hook called (iface %s, src %I, dst %I)\n", + sk->iface->name, sk->faddr, sk->laddr); + + /* Silently ignore my own packets */ + if (ipa_equal(ifa->iface->addr->ip, sk->faddr)) + return 1; + + if (!ipa_is_link_local(sk->faddr)) + DROP1("wrong src address"); + + if (sk->fport != ifa->cf->port) + DROP("wrong src port", sk->fport); + + if (len < sizeof(struct babel_pkt_header)) + DROP("too short", len); + + if (sk->flags & SKF_TRUNCATED) + DROP("truncated", len); + + babel_process_packet((struct babel_pkt_header *) sk->rbuf, len, sk->faddr, ifa); + return 1; + +drop: + LOG_PKT("Bad packet from %I via %s - %s (%u)", + sk->faddr, sk->iface->name, err_dsc, err_val); + return 1; +} + +int +babel_open_socket(struct babel_iface *ifa) +{ + struct babel_proto *p = ifa->proto; + + sock *sk; + sk = sk_new(ifa->pool); + sk->type = SK_UDP; + sk->sport = ifa->cf->port; + sk->dport = ifa->cf->port; + sk->iface = ifa->iface; + + sk->rx_hook = babel_rx_hook; + sk->tx_hook = babel_tx_hook; + sk->err_hook = babel_err_hook; + sk->data = ifa; + + sk->tos = ifa->cf->tx_tos; + sk->priority = ifa->cf->tx_priority; + sk->ttl = 1; + sk->flags = SKF_LADDR_RX; + + if (sk_open(sk) < 0) + goto err; + + if (sk_setup_multicast(sk) < 0) + goto err; + + if (sk_join_group(sk, IP6_BABEL_ROUTERS) < 0) + goto err; + + ifa->sk = sk; + return 1; + +err: + sk_log_error(sk, p->p.name); + rfree(sk); + return 0; +} diff --git a/proto/bgp/bgp.c b/proto/bgp/bgp.c index 61b5cba2..0ae3db7b 100644 --- a/proto/bgp/bgp.c +++ b/proto/bgp/bgp.c @@ -121,7 +121,8 @@ bgp_open(struct bgp_proto *p) bgp_counter++; if (p->cf->password) - if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, p->cf->password) < 0) + if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip, + p->cf->iface, p->cf->password, p->cf->setkey) < 0) { sk_log_error(bgp_listen_sk, p->p.name); bgp_close(p, 0); @@ -191,7 +192,8 @@ bgp_close(struct bgp_proto *p, int apply_md5) bgp_counter--; if (p->cf->password && apply_md5) - if (sk_set_md5_auth(bgp_listen_sk, p->cf->remote_ip, p->cf->iface, NULL) < 0) + if (sk_set_md5_auth(bgp_listen_sk, p->cf->source_addr, p->cf->remote_ip, + p->cf->iface, NULL, p->cf->setkey) < 0) sk_log_error(bgp_listen_sk, p->p.name); if (!bgp_counter) diff --git a/proto/bgp/bgp.h b/proto/bgp/bgp.h index 274794f1..b1cca2d9 100644 --- a/proto/bgp/bgp.h +++ b/proto/bgp/bgp.h @@ -51,6 +51,7 @@ struct bgp_config { int add_path; /* Use ADD-PATH extension [draft] */ int allow_local_as; /* Allow that number of local ASNs in incoming AS_PATHs */ int gr_mode; /* Graceful restart mode (BGP_GR_*) */ + int setkey; /* Set MD5 password to system SA/SP database */ unsigned gr_time; /* Graceful restart timeout */ unsigned connect_delay_time; /* Minimum delay between connect attempts */ unsigned connect_retry_time; /* Timeout for connect attempts */ diff --git a/proto/bgp/config.Y b/proto/bgp/config.Y index 614ef08c..33561bff 100644 --- a/proto/bgp/config.Y +++ b/proto/bgp/config.Y @@ -27,7 +27,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY, INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP, TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY, DETERMINISTIC, SECONDARY, ALLOW, BFD, ADD, PATHS, RX, TX, GRACEFUL, RESTART, AWARE, - CHECK, LINK, PORT, EXTENDED, MESSAGES) + CHECK, LINK, PORT, EXTENDED, MESSAGES, SETKEY) CF_GRAMMAR @@ -54,6 +54,7 @@ bgp_proto_start: proto_start BGP { BGP_CFG->default_local_pref = 100; BGP_CFG->gr_mode = BGP_GR_AWARE; BGP_CFG->gr_time = 120; + BGP_CFG->setkey = 1; } ; @@ -112,6 +113,7 @@ bgp_proto: | bgp_proto CAPABILITIES bool ';' { BGP_CFG->capabilities = $3; } | bgp_proto ADVERTISE IPV4 bool ';' { BGP_CFG->advertise_ipv4 = $4; } | bgp_proto PASSWORD text ';' { BGP_CFG->password = $3; } + | bgp_proto SETKEY bool ';' { BGP_CFG->setkey = $3; } | bgp_proto PASSIVE bool ';' { BGP_CFG->passive = $3; } | bgp_proto INTERPRET COMMUNITIES bool ';' { BGP_CFG->interpret_communities = $4; } | bgp_proto SECONDARY bool ';' { BGP_CFG->secondary = $3; } diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c index 65bdb3ec..195b03ad 100644 --- a/proto/ospf/dbdes.c +++ b/proto/ospf/dbdes.c @@ -192,6 +192,7 @@ ospf_do_send_dbdes(struct ospf_proto *p, struct ospf_neighbor *n) /** * ospf_send_dbdes - transmit database description packet + * @p: OSPF protocol instance * @n: neighbor * * Sending of a database description packet is described in 10.8 of RFC 2328. diff --git a/proto/ospf/lsalib.c b/proto/ospf/lsalib.c index 5564bee7..b4d2faba 100644 --- a/proto/ospf/lsalib.c +++ b/proto/ospf/lsalib.c @@ -552,12 +552,13 @@ lsa_validate_prefix(struct ospf_lsa_header *lsa, struct ospf_lsa_prefix *body) /** * lsa_validate - check whether given LSA is valid * @lsa: LSA header + * @lsa_type: internal LSA type (%LSA_T_xxx) + * @ospf2: %true for OSPFv2, %false for OSPFv3 * @body: pointer to LSA body * * Checks internal structure of given LSA body (minimal length, * consistency). Returns true if valid. */ - int lsa_validate(struct ospf_lsa_header *lsa, u32 lsa_type, int ospf2, void *body) { diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c index 35ef7c6e..7ce6d99f 100644 --- a/proto/ospf/packet.c +++ b/proto/ospf/packet.c @@ -207,7 +207,7 @@ drop: /** * ospf_rx_hook * @sk: socket we received the packet. - * @size: size of the packet + * @len: length of the packet * * This is the entry point for messages from neighbors. Many checks (like * authentication, checksums, size) are done before the packet is passed to diff --git a/sysdep/autoconf.h.in b/sysdep/autoconf.h.in index a9e46e27..c73270c3 100644 --- a/sysdep/autoconf.h.in +++ b/sysdep/autoconf.h.in @@ -43,6 +43,7 @@ #undef CONFIG_BGP #undef CONFIG_OSPF #undef CONFIG_PIPE +#undef CONFIG_BABEL /* We use multithreading */ #undef USE_PTHREADS diff --git a/sysdep/bsd/setkey.h b/sysdep/bsd/setkey.h new file mode 100644 index 00000000..b417faca --- /dev/null +++ b/sysdep/bsd/setkey.h @@ -0,0 +1,170 @@ +/* + * BIRD -- Manipulation the IPsec SA/SP database using setkey(8) utility + * + * (c) 2016 CZ.NIC z.s.p.o. + */ + +#include +#include +#include +#include +#include + +#include "nest/bird.h" +#include "lib/unix.h" + + +/* + * Open a socket for manage the IPsec SA/SP database entries + */ +static int +setkey_open_socket(void) +{ + int s = socket(PF_KEY, SOCK_RAW, PF_KEY_V2); + if (s < 0) + { + log(L_ERR "SETKEY: socket: %m"); + return -1; + } + + return s; +} + +static int +setkey_send(struct sadb_msg *msg, uint len) +{ + int s = setkey_open_socket(); + if (s < 0) + return -1; + + if (msg->sadb_msg_type == SADB_ADD) + { + /* Delete possible current key in the IPsec SA/SP database */ + msg->sadb_msg_type = SADB_DELETE; + send(s, msg, len, 0); + msg->sadb_msg_type = SADB_ADD; + } + + if (send(s, msg, len, 0) < 0) + { + log(L_ERR "SETKEY: send: %m"); + close(s); + return -1; + } + + close(s); + return 0; +} + +/* + * Perform setkey(8)-like operation for set the password for TCP MD5 Signature. + * Could be called with SABD_ADD or SADB_DELETE argument. Note that SADB_ADD + * argument is internally processed as a pair of SADB_ADD and SADB_DELETE + * operations to implement replace. + */ +static int +setkey_md5(sockaddr *src, sockaddr *dst, char *passwd, uint type) +{ + uint passwd_len = passwd ? strlen(passwd) : 0; + + uint total = + sizeof(struct sadb_msg) + + sizeof(struct sadb_key) + PFKEY_ALIGN8(passwd_len) + + sizeof(struct sadb_sa) + + sizeof(struct sadb_x_sa2) + + sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa.sa_len) + + sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa.sa_len); + + char *buf = alloca(total); + char *pos = buf; + uint len; + + memset(buf, 0, total); + + struct sadb_msg *msg = (void *) pos; + len = sizeof(struct sadb_msg); + msg->sadb_msg_version = PF_KEY_V2; + msg->sadb_msg_type = type; + msg->sadb_msg_satype = SADB_X_SATYPE_TCPSIGNATURE; + msg->sadb_msg_len = 0; /* Fix it later */ + msg->sadb_msg_pid = getpid(); + pos += len; + + /* Set authentication algorithm and password */ + struct sadb_key *key = (void *) pos; + len = sizeof(struct sadb_key) + PFKEY_ALIGN8(passwd_len); + key->sadb_key_len = PFKEY_UNIT64(len); + key->sadb_key_exttype = SADB_EXT_KEY_AUTH; + key->sadb_key_bits = passwd_len * 8; + memcpy(pos + sizeof(struct sadb_key), passwd, passwd_len); + pos += len; + + struct sadb_sa *sa = (void *) pos; + len = sizeof(struct sadb_sa); + sa->sadb_sa_len = PFKEY_UNIT64(len); + sa->sadb_sa_exttype = SADB_EXT_SA; + sa->sadb_sa_spi = htonl((u32) TCP_SIG_SPI); + sa->sadb_sa_auth = SADB_X_AALG_TCP_MD5; + sa->sadb_sa_encrypt = SADB_EALG_NONE; + sa->sadb_sa_flags = SADB_X_EXT_CYCSEQ; + pos += len; + + struct sadb_x_sa2 *sa2 = (void *) pos; + len = sizeof(struct sadb_x_sa2); + sa2->sadb_x_sa2_len = PFKEY_UNIT64(len); + sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; + sa2->sadb_x_sa2_mode = IPSEC_MODE_ANY; + pos += len; + + /* Set source address */ + struct sadb_address *saddr = (void *) pos; + len = sizeof(struct sadb_address) + PFKEY_ALIGN8(src->sa.sa_len); + saddr->sadb_address_len = PFKEY_UNIT64(len); + saddr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; + saddr->sadb_address_proto = IPSEC_ULPROTO_ANY; + saddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + memcpy(pos + sizeof(struct sadb_address), &src->sa, src->sa.sa_len); + pos += len; + + /* Set destination address */ + struct sadb_address *daddr = (void *) pos; + len = sizeof(struct sadb_address) + PFKEY_ALIGN8(dst->sa.sa_len); + daddr->sadb_address_len = PFKEY_UNIT64(len); + daddr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; + daddr->sadb_address_proto = IPSEC_ULPROTO_ANY; + daddr->sadb_address_prefixlen = MAX_PREFIX_LENGTH; + memcpy(pos + sizeof(struct sadb_address), &dst->sa, dst->sa.sa_len); + pos += len; + + len = pos - buf; + msg->sadb_msg_len = PFKEY_UNIT64(len); + + return setkey_send(msg, len); +} + +/* + * Manipulation with the IPsec SA/SP database + */ +static int +sk_set_md5_in_sasp_db(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd) +{ + sockaddr src, dst; + sockaddr_fill(&src, s->af, local, ifa, 0); + sockaddr_fill(&dst, s->af, remote, ifa, 0); + + if (passwd && *passwd) + { + int len = strlen(passwd); + if (len > TCP_KEYLEN_MAX) + ERR_MSG("The password for TCP MD5 Signature is too long"); + + if (setkey_md5(&src, &dst, passwd, SADB_ADD) < 0) + ERR_MSG("Cannot add TCP-MD5 password into the IPsec SA/SP database"); + } + else + { + if (setkey_md5(&src, &dst, NULL, SADB_DELETE) < 0) + ERR_MSG("Cannot delete TCP-MD5 password from the IPsec SA/SP database"); + } + return 0; +} diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index c82d7a1e..6c20733f 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -189,30 +189,26 @@ sk_prepare_ip_header(sock *s, void *hdr, int dlen) #ifndef TCP_KEYLEN_MAX #define TCP_KEYLEN_MAX 80 #endif + #ifndef TCP_SIG_SPI #define TCP_SIG_SPI 0x1000 #endif -/* - * FIXME: Passwords has to be set by setkey(8) command. This is the same - * behaviour like Quagga. We need to add code for SA/SP entries - * management. - */ +#if defined(__FreeBSD__) +#define USE_MD5SIG_SETKEY +#include "lib/setkey.h" +#endif int -sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) +sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey UNUSED) { - int enable = 0; - - if (passwd && *passwd) - { - int len = strlen(passwd); - enable = TCP_SIG_SPI; - - if (len > TCP_KEYLEN_MAX) - ERR_MSG("MD5 password too long"); - } +#ifdef USE_MD5SIG_SETKEY + if (setkey) + if (sk_set_md5_in_sasp_db(s, local, remote, ifa, passwd) < 0) + return -1; +#endif + int enable = (passwd && *passwd) ? TCP_SIG_SPI : 0; if (setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &enable, sizeof(enable)) < 0) { if (errno == ENOPROTOOPT) diff --git a/sysdep/config.h b/sysdep/config.h index 08c15fe9..a8d58349 100644 --- a/sysdep/config.h +++ b/sysdep/config.h @@ -7,7 +7,7 @@ #define _BIRD_CONFIG_H_ /* BIRD version */ -#define BIRD_VERSION "1.5.0" +#define BIRD_VERSION "1.6.0" /* Include parameters determined by configure script */ #include "sysdep/autoconf.h" diff --git a/sysdep/linux/netlink.c b/sysdep/linux/netlink.c index 8166d5f5..8146072b 100644 --- a/sysdep/linux/netlink.c +++ b/sysdep/linux/netlink.c @@ -126,7 +126,12 @@ nl_get_reply(struct nl_sock *nl) { struct iovec iov = { nl->rx_buffer, NL_RX_SIZE }; struct sockaddr_nl sa; - struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; + struct msghdr m = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; int x = recvmsg(nl->fd, &m, 0); if (x < 0) die("nl_get_reply: %m"); @@ -1343,7 +1348,12 @@ nl_async_hook(sock *sk, int size UNUSED) { struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE }; struct sockaddr_nl sa; - struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; + struct msghdr m = { + .msg_name = &sa, + .msg_namelen = sizeof(sa), + .msg_iov = &iov, + .msg_iovlen = 1, + }; struct nlmsghdr *h; int x; uint len; diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 6386940f..3c07a3e7 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -179,19 +179,19 @@ sk_prepare_cmsgs4(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) */ int -sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) +sk_set_md5_auth(sock *s, ip_addr local UNUSED, ip_addr remote, struct iface *ifa, char *passwd, int setkey UNUSED) { struct tcp_md5sig md5; memset(&md5, 0, sizeof(md5)); - sockaddr_fill((sockaddr *) &md5.tcpm_addr, fam_to_af[s->fam], a, ifa, 0); + sockaddr_fill((sockaddr *) &md5.tcpm_addr, fam_to_af[s->fam], remote, ifa, 0); if (passwd) { int len = strlen(passwd); if (len > TCP_MD5SIG_MAXKEYLEN) - ERR_MSG("MD5 password too long"); + ERR_MSG("The password for TCP MD5 Signature is too long"); md5.tcpm_keylen = len; memcpy(&md5.tcpm_key, passwd, len); diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 08521d75..69c17d60 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -448,6 +448,7 @@ tm_format_reltime(char *x, struct tm *tm, bird_clock_t delta) /** * tm_format_datetime - convert date and time to textual representation * @x: destination buffer of size %TM_DATETIME_BUFFER_SIZE + * @fmt_spec: specification of resulting textual representation of the time * @t: time * * This function formats the given relative time value @t to a textual @@ -952,23 +953,32 @@ sk_set_min_ttl(sock *s, int ttl) /** * sk_set_md5_auth - add / remove MD5 security association for given socket * @s: socket - * @a: IP address of the other side + * @local: IP address of local side + * @remote: IP address of remote side * @ifa: Interface for link-local IP address - * @passwd: password used for MD5 authentication + * @passwd: Password used for MD5 authentication + * @setkey: Update also system SA/SP database * - * In TCP MD5 handling code in kernel, there is a set of pairs (address, - * password) used to choose password according to address of the other side. - * This function is useful for listening socket, for active sockets it is enough - * to set s->password field. + * In TCP MD5 handling code in kernel, there is a set of security associations + * used for choosing password and other authentication parameters according to + * the local and remote address. This function is useful for listening socket, + * for active sockets it may be enough to set s->password field. * * When called with passwd != NULL, the new pair is added, * When called with passwd == NULL, the existing pair is removed. * + * Note that while in Linux, the MD5 SAs are specific to socket, in BSD they are + * stored in global SA/SP database (but the behavior also must be enabled on + * per-socket basis). In case of multiple sockets to the same neighbor, the + * socket-specific state must be configured for each socket while global state + * just once per src-dst pair. The @setkey argument controls whether the global + * state (SA/SP database) is also updated. + * * Result: 0 for success, -1 for an error. */ int -sk_set_md5_auth(sock *s, ip_addr a, struct iface *ifa, char *passwd) +sk_set_md5_auth(sock *s, ip_addr local, ip_addr remote, struct iface *ifa, char *passwd, int setkey) { DUMMY; } #endif @@ -1436,7 +1446,7 @@ sk_open(sock *s) } if (s->password) - if (sk_set_md5_auth(s, s->daddr, s->iface, s->password) < 0) + if (sk_set_md5_auth(s, s->saddr, s->daddr, s->iface, s->password, 0) < 0) goto err; switch (s->type) diff --git a/sysdep/unix/log.c b/sysdep/unix/log.c index 43d98f7b..9c56eb24 100644 --- a/sysdep/unix/log.c +++ b/sysdep/unix/log.c @@ -89,6 +89,7 @@ static char *class_names[] = { /** * log_commit - commit a log message * @class: message class information (%L_DEBUG to %L_BUG, see |lib/birdlib.h|) + * @buf: message to write * * This function writes a message prepared in the log buffer to the * log file (as specified in the configuration). The log buffer is diff --git a/sysdep/unix/main.c b/sysdep/unix/main.c index 691fee2d..1f47680e 100644 --- a/sysdep/unix/main.c +++ b/sysdep/unix/main.c @@ -621,7 +621,7 @@ signal_init(void) * Parsing of command-line arguments */ -static char *opt_list = "c:dD:ps:P:u:g:fR"; +static char *opt_list = "c:dD:ps:P:u:g:flR"; static int parse_and_exit; char *bird_name; static char *use_user; @@ -631,7 +631,7 @@ static int run_in_foreground = 0; static void usage(void) { - fprintf(stderr, "Usage: %s [-c ] [-d] [-D ] [-p] [-s ] [-P ] [-u ] [-g ] [-f] [-R]\n", bird_name); + fprintf(stderr, "Usage: %s [-c ] [-d] [-D ] [-p] [-s ] [-P ] [-u ] [-g ] [-f] [-l] [-R]\n", bird_name); exit(1); } @@ -681,7 +681,7 @@ get_gid(const char *s) if (!s) return 0; - + errno = 0; rv = strtol(s, &endptr, 10); @@ -698,6 +698,8 @@ get_gid(const char *s) static void parse_args(int argc, char **argv) { + int config_changed = 0; + int socket_changed = 0; int c; bird_name = get_bird_name(argv[0], "bird"); @@ -716,6 +718,7 @@ parse_args(int argc, char **argv) { case 'c': config_name = optarg; + config_changed = 1; break; case 'd': debug_flag |= 1; @@ -729,6 +732,7 @@ parse_args(int argc, char **argv) break; case 's': path_control_socket = optarg; + socket_changed = 1; break; case 'P': pid_file = optarg; @@ -742,6 +746,12 @@ parse_args(int argc, char **argv) case 'f': run_in_foreground = 1; break; + case 'l': + if (!config_changed) + config_name = xbasename(config_name); + if (!socket_changed) + path_control_socket = xbasename(path_control_socket); + break; case 'R': graceful_restart_recovery(); break;