From 48e5f32db676645640f84ab3d630cce975aa6b20 Mon Sep 17 00:00:00 2001 From: Ondrej Zajicek Date: Thu, 6 Feb 2014 17:46:01 +0100 Subject: [PATCH] Many changes in I/O and OSPF sockets and packet handling. I/O: - BSD: specify src addr on IP sockets by IP_HDRINCL - BSD: specify src addr on UDP sockets by IP_SENDSRCADDR - Linux: specify src addr on IP/UDP sockets by IP_PKTINFO - IPv6: specify src addr on IP/UDP sockets by IPV6_PKTINFO - Alternative SKF_BIND flag for binding to IP address - Allows IP/UDP sockets without tx_hook, on these sockets a packet is discarded when TX queue is full - Use consistently SOL_ for socket layer values. OSPF: - Packet src addr is always explicitly set - Support for secondary addresses in BSD - Dynamic RX/TX buffers - Fixes some minor buffer overruns - Interface option 'tx length' - Names for vlink pseudoifaces (vlinkX) - Vlinks use separate socket for TX - Vlinks do not use fixed associated iface - Fixes TTL for direct unicast packets - Fixes DONTROUTE for OSPF sockets - Use ifa->ifname instead of ifa->iface->name --- doc/bird.sgml | 30 +++- lib/resource.c | 17 +- lib/socket.h | 18 ++- proto/bfd/packets.c | 2 +- proto/ospf/config.Y | 11 +- proto/ospf/dbdes.c | 37 +++-- proto/ospf/hello.c | 21 +-- proto/ospf/iface.c | 353 ++++++++++++++++++++++++++---------------- proto/ospf/iface.h | 5 + proto/ospf/lsack.c | 6 +- proto/ospf/lsreq.c | 4 +- proto/ospf/lsupd.c | 32 ++-- proto/ospf/neighbor.c | 23 ++- proto/ospf/ospf.c | 20 +-- proto/ospf/ospf.h | 28 ++-- proto/ospf/packet.c | 54 ++++--- proto/ospf/packet.h | 17 +- proto/ospf/rt.c | 38 +++-- proto/ospf/topology.c | 23 ++- proto/radv/packets.c | 4 +- proto/rip/rip.c | 1 - sysdep/bsd/sysio.h | 53 ++++--- sysdep/cf/README | 2 + sysdep/cf/bsd.h | 1 + sysdep/linux/sysio.h | 56 ++----- sysdep/unix/io.c | 294 ++++++++++++++++++++++++----------- 26 files changed, 701 insertions(+), 449 deletions(-) diff --git a/doc/bird.sgml b/doc/bird.sgml index 13ffa94e..2f40a750 100644 --- a/doc/bird.sgml +++ b/doc/bird.sgml @@ -2212,7 +2212,9 @@ protocol ospf <name> { wait <num>; dead count <num>; dead <num>; + secondary <switch>; rx buffer [normal|large|<num>]; + tx length <num>; type [broadcast|bcast|pointopoint|ptp| nonbroadcast|nbma|pointomultipoint|ptmp]; strict nonbroadcast <switch>; @@ -2419,12 +2421,32 @@ protocol ospf <name> { dead num When the router does not receive any messages from a neighbor in secondary switch + On BSD systems, older versions of BIRD supported OSPFv2 only for the + primary IP address of an interface, other IP ranges on the interface + were handled as stub networks. Since v1.4.1, regular operation on + secondary IP addresses is supported, but disabled by default for + compatibility. This option allows to enable it. The option is a + transitional measure, will be removed in the next major release as the + behavior will be changed. On Linux systems, the option is irrelevant, as + operation on non-primary addresses is already the regular behavior. rx buffer num - This sets the size of buffer used for receiving packets. The buffer should - be bigger than maximal size of any packets. Value NORMAL (default) - means 2*MTU, value LARGE means maximal allowed packet - 65535. + This option allows to specify the size of buffers used for packet + processing. The buffer size should be bigger than maximal size of any + packets. By default, buffers are dynamically resized as needed, but a + fixed value could be specified. Value tx length num + Transmitted OSPF messages that contain large amount of information are + segmented to separate OSPF packets to avoid IP fragmentation. This + option specifies the soft ceiling for the length of generated OSPF + packets. Default value is the MTU of the network interface. Note that + larger OSPF packets may still be generated if underlying OSPF messages + cannot be splitted (e.g. when one large LSA is propagated). type broadcast|bcast BIRD detects a type of a connected network automatically, but diff --git a/lib/resource.c b/lib/resource.c index bf4b3ae9..64f9a39c 100644 --- a/lib/resource.c +++ b/lib/resource.c @@ -157,13 +157,13 @@ rfree(void *res) { resource *r = res; - if (r) - { - if (r->n.next) - rem_node(&r->n); - r->class->free(r); - xfree(r); - } + if (!r) + return; + + if (r->n.next) + rem_node(&r->n); + r->class->free(r); + xfree(r); } /** @@ -408,6 +408,9 @@ mb_realloc(void *m, unsigned size) void mb_free(void *m) { + if (!m) + return; + struct mblock *b = SKIP_BACK(struct mblock, data, m); rfree(b); } diff --git a/lib/socket.h b/lib/socket.h index 780d596b..894d5561 100644 --- a/lib/socket.h +++ b/lib/socket.h @@ -57,6 +57,9 @@ int sk_open(sock *); /* Open socket */ int sk_send(sock *, unsigned len); /* Send data, <0=err, >0=ok, 0=sleep */ int sk_send_to(sock *, unsigned len, ip_addr to, unsigned port); /* sk_send to given destination */ void sk_reallocate(sock *); /* Free and allocate tbuf & rbuf */ +void sk_set_rbsize(sock *s, uint val); /* Resize RX buffer */ +void sk_set_tbsize(sock *s, uint val); /* Resize TX buffer, keeping content */ +void sk_set_tbuf(sock *s, void *tbuf); /* Switch TX buffer, NULL-> return to internal */ void sk_dump_all(void); int sk_set_ttl(sock *s, int ttl); /* Set transmit TTL for given socket */ int sk_set_min_ttl(sock *s, int ttl); /* Set minimal accepted TTL for given socket */ @@ -89,10 +92,13 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shoul #define SKF_V6ONLY 1 /* Use IPV6_V6ONLY socket option */ #define SKF_LADDR_RX 2 /* Report local address for RX packets */ -#define SKF_LADDR_TX 4 /* Allow to specify local address for TX packets */ -#define SKF_TTL_RX 8 /* Report TTL / Hop Limit for RX packets */ +#define SKF_TTL_RX 4 /* Report TTL / Hop Limit for RX packets */ +#define SKF_BIND 8 /* Bind datagram socket to given source address */ #define SKF_THREAD 0x100 /* Socked used in thread, Do not add to main loop */ +#define SKF_TRUNCATED 0x200 /* Received packet was truncated, set by IO layer */ +#define SKF_HDRINCL 0x400 /* Used internally */ +#define SKF_PKTINFO 0x800 /* Used internally */ /* * Socket types SA SP DA DP IF TTL SendTo (?=may, -=must not, *=must) @@ -118,6 +124,14 @@ extern int sk_priority_control; /* Suggested priority for control traffic, shoul * call sk_setup_multicast() to enable multicast on that socket, * and then use sk_join_group() and sk_leave_group() to manage * a set of received multicast groups. + * + * For datagram (SK_UDP, SK_IP) sockets, there are two ways to handle + * source address. The socket could be bound to it using bind() + * syscall, but that also forbids the reception of multicast packets, + * or the address could be set on per-packet basis using platform + * dependent options (but these are not available in some corner + * cases). The first way is used when SKF_BIND is specified, the + * second way is used otherwise. */ #endif diff --git a/proto/bfd/packets.c b/proto/bfd/packets.c index fc2616ca..964172d8 100644 --- a/proto/bfd/packets.c +++ b/proto/bfd/packets.c @@ -230,7 +230,7 @@ bfd_open_tx_sk(struct bfd_proto *p, ip_addr local, struct iface *ifa) sk->tos = IP_PREC_INTERNET_CONTROL; sk->priority = sk_priority_control; sk->ttl = ifa ? 255 : -1; - sk->flags = SKF_THREAD; + sk->flags = SKF_THREAD | SKF_BIND; #ifdef IPV6 sk->flags |= SKF_V6ONLY; diff --git a/proto/ospf/config.Y b/proto/ospf/config.Y index c47a8cd2..f894f134 100644 --- a/proto/ospf/config.Y +++ b/proto/ospf/config.Y @@ -131,7 +131,8 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC, TTL, SECURITY) CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK, ONLY, BFD) CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL) CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY) -CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY) +CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY, LENGTH) +CF_KEYWORDS(SECONDARY) %type opttext %type lsadb_args @@ -302,14 +303,16 @@ ospf_iface_item: | AUTHENTICATION NONE { OSPF_PATT->autype = OSPF_AUTH_NONE ; } | AUTHENTICATION SIMPLE { OSPF_PATT->autype = OSPF_AUTH_SIMPLE ; } | AUTHENTICATION CRYPTOGRAPHIC { OSPF_PATT->autype = OSPF_AUTH_CRYPT ; } - | RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; } - | RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; } - | RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); } + | RX BUFFER NORMAL { OSPF_PATT->rx_buffer = 0; } + | RX BUFFER LARGE { OSPF_PATT->rx_buffer = OSPF_MAX_PKT_SIZE; } + | RX BUFFER expr { OSPF_PATT->rx_buffer = $3; if (($3 < OSPF_MIN_PKT_SIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); } | TX tos { OSPF_PATT->tx_tos = $2; } | TX PRIORITY expr { OSPF_PATT->tx_priority = $3; } + | TX LENGTH expr { OSPF_PATT->tx_length = $3; if (($3 < OSPF_MIN_PKT_SIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("TX length must be in range 256-65535"); } | TTL SECURITY bool { OSPF_PATT->ttl_security = $3; } | TTL SECURITY TX ONLY { OSPF_PATT->ttl_security = 2; } | BFD bool { OSPF_PATT->bfd = $2; cf_check_bfd($2); } + | SECONDARY bool { OSPF_PATT->bsd_secondary = $2; } | password_list ; diff --git a/proto/ospf/dbdes.c b/proto/ospf/dbdes.c index 75ecf24c..6b291344 100644 --- a/proto/ospf/dbdes.c +++ b/proto/ospf/dbdes.c @@ -103,7 +103,7 @@ ospf_dbdes_send(struct ospf_neighbor *n, int next) length = sizeof(struct ospf_dbdes_packet); op->length = htons(length); - OSPF_PACKET(ospf_dump_dbdes, pkt, "DBDES packet sent to %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_dbdes, pkt, "DBDES packet sent to %I via %s", n->ip, ifa->ifname); ospf_send_to(ifa, n->ip); break; @@ -115,7 +115,14 @@ ospf_dbdes_send(struct ospf_neighbor *n, int next) snode *sn; struct ospf_lsa_header *lsa; - pkt = n->ldbdes; + if (n->ldd_bsize != ifa->tx_length) + { + mb_free(n->ldd_buffer); + n->ldd_buffer = mb_allocz(n->pool, ifa->tx_length); + n->ldd_bsize = ifa->tx_length; + } + + pkt = n->ldd_buffer; op = (struct ospf_packet *) pkt; ospf_pkt_fill_hdr(ifa, pkt, DBDES_P); @@ -124,7 +131,7 @@ ospf_dbdes_send(struct ospf_neighbor *n, int next) pkt->options = hton_opt(oa->options); j = i = (ospf_pkt_maxsize(ifa) - sizeof(struct ospf_dbdes_packet)) / sizeof(struct ospf_lsa_header); /* Number of possible lsaheaders to send */ - lsa = (n->ldbdes + sizeof(struct ospf_dbdes_packet)); + lsa = (n->ldd_buffer + sizeof(struct ospf_dbdes_packet)); if (n->myimms.bit.m) { @@ -175,7 +182,7 @@ ospf_dbdes_send(struct ospf_neighbor *n, int next) case NEIGHBOR_LOADING: case NEIGHBOR_FULL: - length = ntohs(((struct ospf_packet *) n->ldbdes)->length); + length = n->ldd_buffer ? ntohs(((struct ospf_packet *) n->ldd_buffer)->length) : 0; if (!length) { @@ -184,12 +191,13 @@ ospf_dbdes_send(struct ospf_neighbor *n, int next) return; } - /* Copy last sent packet again */ - pkt = ospf_tx_buffer(ifa); - memcpy(pkt, n->ldbdes, length); + /* Send last packet from ldd buffer */ - OSPF_PACKET(ospf_dump_dbdes, pkt, "DBDES packet sent to %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_dbdes, n->ldd_buffer, "DBDES packet sent to %I via %s", n->ip, ifa->ifname); + + sk_set_tbuf(ifa->sk, n->ldd_buffer); ospf_send_to(ifa, n->ip); + sk_set_tbuf(ifa->sk, NULL); if(n->myimms.bit.ms) tm_start(n->rxmt_timer, n->ifa->rxmtint); /* Restart timer */ @@ -262,7 +270,7 @@ ospf_dbdes_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, u32 ps_options = ntoh_opt(ps->options); u16 ps_iface_mtu = ntohs(ps->iface_mtu); - OSPF_PACKET(ospf_dump_dbdes, ps, "DBDES packet received from %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_dbdes, ps, "DBDES packet received from %I via %s", n->ip, ifa->ifname); ospf_neigh_sm(n, INM_HELLOREC); @@ -279,10 +287,10 @@ ospf_dbdes_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, return; case NEIGHBOR_EXSTART: - if ((ps_iface_mtu != ifa->iface->mtu) && (ifa->type != OSPF_IT_VLINK) + if ((ifa->type != OSPF_IT_VLINK) && (ps_iface_mtu != ifa->iface->mtu) && (ps_iface_mtu != 0) && (ifa->iface->mtu != 0)) log(L_WARN "OSPF: MTU mismatch with neighbour %I on interface %s (remote %d, local %d)", - n->ip, ifa->iface->name, ps_iface_mtu, ifa->iface->mtu); + n->ip, ifa->ifname, ps_iface_mtu, ifa->iface->mtu); if ((ps->imms.bit.m && ps->imms.bit.ms && ps->imms.bit.i) && (n->rid > po->router_id) && (size == sizeof(struct ospf_dbdes_packet))) @@ -361,8 +369,8 @@ ospf_dbdes_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, { if (ps_ddseq != n->dds) /* MASTER */ { - OSPF_TRACE(D_PACKETS, - "dbdes - sequence mismatch neighbor %I (master)", n->ip); + OSPF_TRACE(D_PACKETS, "dbdes - sequence mismatch neighbor %I (master)", + n->ip); ospf_neigh_sm(n, INM_SEQMIS); break; } @@ -383,8 +391,7 @@ ospf_dbdes_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, { if (ps_ddseq != (n->dds + 1)) /* SLAVE */ { - OSPF_TRACE(D_PACKETS, "dbdes - sequence mismatch neighbor %I (slave)", - n->ip); + OSPF_TRACE(D_PACKETS, "dbdes - sequence mismatch neighbor %I (slave)", n->ip); ospf_neigh_sm(n, INM_SEQMIS); break; } diff --git a/proto/ospf/hello.c b/proto/ospf/hello.c index b6b11004..e8bce09f 100644 --- a/proto/ospf/hello.c +++ b/proto/ospf/hello.c @@ -61,8 +61,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, struct ospf_hello_packet *ps = (void *) ps_i; - OSPF_TRACE(D_PACKETS, "HELLO packet received from %I via %s%s", faddr, - (ifa->type == OSPF_IT_VLINK ? "vlink-" : ""), ifa->iface->name); + OSPF_TRACE(D_PACKETS, "HELLO packet received from %I via %s", faddr, ifa->ifname); #ifdef OSPFv2 ip_addr mask = ps->netmask; @@ -120,8 +119,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, if (!nn && ifa->strictnbma) { - log(L_WARN "Ignoring new neighbor: %I on %s", faddr, - ifa->iface->name); + log(L_WARN "Ignoring new neighbor: %I on %s", faddr, ifa->ifname); return; } @@ -129,8 +127,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, (((ps->priority == 0) && nn->eligible) || ((ps->priority > 0) && !nn->eligible))) { - log(L_ERR "Eligibility mismatch for neighbor: %I on %s", - faddr, ifa->iface->name); + log(L_ERR "Eligibility mismatch for neighbor: %I on %s", faddr, ifa->ifname); return; } @@ -138,8 +135,7 @@ ospf_hello_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, nn->found = 1; } - OSPF_TRACE(D_EVENTS, "New neighbor found: %I on %s", faddr, - ifa->iface->name); + OSPF_TRACE(D_EVENTS, "New neighbor found: %I on %s", faddr, ifa->ifname); n = ospf_neighbor_new(ifa); @@ -263,7 +259,7 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) p = (struct proto *) (ifa->oa->po); DBG("%s: Hello/Poll timer fired on interface %s with IP %I\n", - p->name, ifa->iface->name, ifa->addr->ip); + p->name, ifa->ifname, ifa->addr->ip); /* Now we should send a hello packet */ pkt = ospf_tx_buffer(ifa); @@ -309,9 +305,9 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) u32 *pp = (u32 *) (((u8 *) pkt) + sizeof(struct ospf_hello_packet)); WALK_LIST(neigh, ifa->neigh_list) { - if ((i+1) * sizeof(u32) + sizeof(struct ospf_hello_packet) > ospf_pkt_bufsize(ifa)) + if ((i+1) * sizeof(u32) + sizeof(struct ospf_hello_packet) > ospf_pkt_maxsize(ifa)) { - log(L_WARN "%s: Too many neighbors on interface %s", p->name, ifa->iface->name); + log(L_WARN "%s: Too many neighbors on interface %s", p->name, ifa->ifname); break; } *(pp + i) = htonl(neigh->rid); @@ -376,6 +372,5 @@ ospf_hello_send(struct ospf_iface *ifa, int kind, struct ospf_neighbor *dirn) bug("Bug in ospf_hello_send()"); } - OSPF_TRACE(D_PACKETS, "HELLO packet sent via %s%s", - (ifa->type == OSPF_IT_VLINK ? "vlink-" : ""), ifa->iface->name); + OSPF_TRACE(D_PACKETS, "HELLO packet sent via %s", ifa->ifname); } diff --git a/proto/ospf/iface.c b/proto/ospf/iface.c index 333c2a6d..f4d9be55 100644 --- a/proto/ospf/iface.c +++ b/proto/ospf/iface.c @@ -36,29 +36,46 @@ wait_timer_hook(timer * timer) struct ospf_iface *ifa = (struct ospf_iface *) timer->data; struct proto *p = &ifa->oa->po->proto; - OSPF_TRACE(D_EVENTS, "Wait timer fired on interface %s.", ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Wait timer fired on interface %s.", ifa->ifname); ospf_iface_sm(ifa, ISM_WAITF); } -static void ospf_iface_change_mtu(struct proto_ospf *po, struct ospf_iface *ifa); - -u32 -rxbufsize(struct ospf_iface *ifa) +static inline uint +ifa_tx_length(struct ospf_iface *ifa) { - switch(ifa->rxbuf) - { - case OSPF_RXBUF_NORMAL: - return (ifa->iface->mtu * 2); - break; - case OSPF_RXBUF_LARGE: - return OSPF_MAX_PKT_SIZE; - break; - default: - return ifa->rxbuf; - break; - } + return ifa->cf->tx_length ?: ifa->iface->mtu; } +static inline uint +ifa_bufsize(struct ospf_iface *ifa) +{ + uint bsize = ifa->cf->rx_buffer ?: ifa->iface->mtu; + return MAX(bsize, ifa->tx_length); +} + +int +ospf_iface_assure_bufsize(struct ospf_iface *ifa, uint plen) +{ + plen += SIZE_OF_IP_HEADER; + +#ifdef OSPFv2 + if (ifa->autype == OSPF_AUTH_CRYPT) + plen += OSPF_AUTH_CRYPT_SIZE; +#endif + + if (plen <= ifa->sk->tbsize) + return 0; + + if (ifa->cf->rx_buffer || (plen > 0xffff)) + return -1; + + plen = BIRD_ALIGN(plen, 1024); + plen = MIN(plen, 0xffff); + sk_set_tbsize(ifa->sk, plen); + return 1; +} + + struct nbma_node * find_nbma_node_in(list *nnl, ip_addr ip) { @@ -69,27 +86,27 @@ find_nbma_node_in(list *nnl, ip_addr ip) return NULL; } + static int ospf_sk_open(struct ospf_iface *ifa) { sock *sk = sk_new(ifa->pool); sk->type = SK_IP; sk->dport = OSPF_PROTO; - sk->saddr = IPA_NONE; + sk->saddr = ifa->addr->ip; + sk->iface = ifa->iface; sk->tos = ifa->cf->tx_tos; sk->priority = ifa->cf->tx_priority; sk->rx_hook = ospf_rx_hook; - sk->tx_hook = ospf_tx_hook; + // sk->tx_hook = ospf_tx_hook; sk->err_hook = ospf_err_hook; - sk->iface = ifa->iface; - sk->rbsize = rxbufsize(ifa); - sk->tbsize = rxbufsize(ifa); + sk->rbsize = sk->tbsize = ifa_bufsize(ifa); sk->data = (void *) ifa; sk->flags = SKF_LADDR_RX | (ifa->check_ttl ? SKF_TTL_RX : 0); - sk->ttl = ifa->cf->ttl_security ? 255 : -1; + sk->ttl = ifa->cf->ttl_security ? 255 : 1; - if (sk_open(sk) != 0) + if (sk_open(sk) < 0) goto err; #ifdef OSPFv3 @@ -98,28 +115,6 @@ ospf_sk_open(struct ospf_iface *ifa) goto err; #endif - /* - * For OSPFv2: When sending a packet, it is important to have a - * proper source address. We expect that when we send one-hop - * unicast packets, OS chooses a source address according to the - * destination address (to be in the same prefix). We also expect - * that when we send multicast packets, OS uses the source address - * from sk->saddr registered to OS by sk_setup_multicast(). This - * behavior is needed to implement multiple virtual ifaces (struct - * ospf_iface) on one physical iface and is signalized by - * CONFIG_MC_PROPER_SRC. - * - * If this behavior is not available (for example on BSD), we create - * non-stub iface just for the primary IP address (see - * ospf_iface_stubby()) and we expect OS to use primary IP address - * as a source address for both unicast and multicast packets. - * - * FIXME: the primary IP address is currently just the - * lexicographically smallest address on an interface, it should be - * signalized by sysdep code which one is really the primary. - */ - - sk->saddr = ifa->addr->ip; if ((ifa->type == OSPF_IT_BCAST) || (ifa->type == OSPF_IT_PTP)) { if (ifa->cf->real_bcast) @@ -132,7 +127,6 @@ ospf_sk_open(struct ospf_iface *ifa) else { ifa->all_routers = AllSPFRouters; - sk->ttl = ifa->cf->ttl_security ? 255 : 1; if (sk_setup_multicast(sk) < 0) goto err; @@ -171,6 +165,42 @@ ospf_sk_leave_dr(struct ospf_iface *ifa) ifa->sk_dr = 0; } +void +ospf_open_vlink_sk(struct proto_ospf *po) +{ + struct proto *p = &po->proto; + + sock *sk = sk_new(po->proto.pool); + sk->type = SK_IP; + sk->dport = OSPF_PROTO; + + /* FIXME: configurable tos/priority ? */ + sk->tos = IP_PREC_INTERNET_CONTROL; + sk->priority = sk_priority_control; + sk->err_hook = ospf_verr_hook; + + sk->rbsize = 0; + sk->tbsize = OSPF_VLINK_MTU; + sk->data = (void *) po; + sk->flags = 0; + + if (sk_open(sk) < 0) + goto err; + +#ifdef OSPFv3 + /* 12 is an offset of the checksum in an OSPF packet */ + if (sk_set_ipv6_checksum(sk, 12) < 0) + goto err; +#endif + + po->vlink_sk = sk; + return; + + err: + rfree(sk); + log(L_ERR "%s: Cannot open virtual link socket", p->name); +} + static void ospf_iface_down(struct ospf_iface *ifa) { @@ -183,10 +213,10 @@ ospf_iface_down(struct ospf_iface *ifa) { #ifdef OSPFv2 OSPF_TRACE(D_EVENTS, "Removing interface %s (%I/%d) from area %R", - ifa->iface->name, ifa->addr->prefix, ifa->addr->pxlen, ifa->oa->areaid); + ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen, ifa->oa->areaid); #else OSPF_TRACE(D_EVENTS, "Removing interface %s (IID %d) from area %R", - ifa->iface->name, ifa->instance_id, ifa->oa->areaid); + ifa->ifname, ifa->instance_id, ifa->oa->areaid); #endif /* First of all kill all the related vlinks */ @@ -215,9 +245,7 @@ ospf_iface_down(struct ospf_iface *ifa) if (ifa->type == OSPF_IT_VLINK) { ifa->vifa = NULL; - ifa->iface = NULL; ifa->addr = NULL; - ifa->sk = NULL; ifa->cost = 0; ifa->vip = IPA_NONE; } @@ -276,7 +304,7 @@ ospf_iface_chstate(struct ospf_iface *ifa, u8 state) ifa->vid, ospf_is[oldstate], ospf_is[state]); else OSPF_TRACE(D_EVENTS, "Changing state of iface %s from %s to %s", - ifa->iface->name, ospf_is[oldstate], ospf_is[state]); + ifa->ifname, ospf_is[oldstate], ospf_is[state]); if ((ifa->type == OSPF_IT_BCAST) && !ifa->cf->real_bcast && ifa->sk) { @@ -318,8 +346,7 @@ ospf_iface_chstate(struct ospf_iface *ifa, u8 state) void ospf_iface_sm(struct ospf_iface *ifa, int event) { - DBG("SM on %s %s. Event is '%s'\n", (ifa->type == OSPF_IT_VLINK) ? "vlink" : "iface", - ifa->iface ? ifa->iface->name : "(none)" , ospf_ism[event]); + DBG("SM on iface %s. Event is '%s'\n", ifa->ifname, ospf_ism[event]); switch (event) { @@ -436,7 +463,7 @@ ospf_iface_add(struct object_lock *lock) /* Open socket if interface is not stub */ if (! ifa->stub && ! ospf_sk_open(ifa)) { - log(L_ERR "%s: Socket open failed on interface %s, declaring as stub", p->name, ifa->iface->name); + log(L_ERR "%s: Socket open failed on interface %s, declaring as stub", p->name, ifa->ifname); ifa->ioprob = OSPF_I_SK; ifa->stub = 1; } @@ -469,9 +496,6 @@ add_nbma_node(struct ospf_iface *ifa, struct nbma_node *src, int found) static int ospf_iface_stubby(struct ospf_iface_patt *ip, struct ifa *addr) { - if (! addr) - return 0; - /* a host address */ if (addr->flags & IA_HOST) return 1; @@ -481,12 +505,11 @@ ospf_iface_stubby(struct ospf_iface_patt *ip, struct ifa *addr) return 1; /* - * We cannot properly support multiple OSPF ifaces on real iface - * with multiple prefixes, therefore we force OSPF ifaces with - * non-primary IP prefixes to be stub. + * For compatibility reasons on BSD systems, we force OSPF + * interfaces with non-primary IP prefixes to be stub. */ #if defined(OSPFv2) && !defined(CONFIG_MC_PROPER_SRC) - if (! (addr->flags & IA_PRIMARY)) + if (!ip->bsd_secondary && !(addr->flags & IA_PRIMARY)) return 1; #endif @@ -497,25 +520,17 @@ void ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *ip) { struct proto *p = &oa->po->proto; - struct iface *iface = addr ? addr->iface : NULL; + struct iface *iface = addr->iface; + struct ospf_iface *ifa; struct pool *pool; - struct ospf_iface *ifa; - struct nbma_node *nb; - struct object_lock *lock; - - if (ip->type == OSPF_IT_VLINK) - OSPF_TRACE(D_EVENTS, "Adding vlink to %R via area %R", ip->vid, ip->voa); - else - { #ifdef OSPFv2 - OSPF_TRACE(D_EVENTS, "Adding interface %s (%I/%d) to area %R", - iface->name, addr->prefix, addr->pxlen, oa->areaid); + OSPF_TRACE(D_EVENTS, "Adding interface %s (%I/%d) to area %R", + iface->name, addr->prefix, addr->pxlen, oa->areaid); #else - OSPF_TRACE(D_EVENTS, "Adding interface %s (IID %d) to area %R", - iface->name, ip->instance_id, oa->areaid); + OSPF_TRACE(D_EVENTS, "Adding interface %s (IID %d) to area %R", + iface->name, ip->instance_id, oa->areaid); #endif - } pool = rp_new(p->pool, "OSPF Interface"); ifa = mb_allocz(pool, sizeof(struct ospf_iface)); @@ -525,6 +540,9 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i ifa->cf = ip; ifa->pool = pool; + ifa->iface_id = iface->index; + ifa->ifname = iface->name; + ifa->cost = ip->cost; ifa->rxmtint = ip->rxmtint; ifa->inftransdelay = ip->inftransdelay; @@ -536,7 +554,8 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i ifa->deadint = ip->deadint; ifa->stub = ospf_iface_stubby(ip, addr); ifa->ioprob = OSPF_I_OK; - ifa->rxbuf = ip->rxbuf; + + ifa->tx_length = ifa_tx_length(ifa); ifa->check_link = ip->check_link; ifa->ecmp_weight = ip->ecmp_weight; ifa->check_ttl = (ip->ttl_security == 1); @@ -545,7 +564,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i #ifdef OSPFv2 ifa->autype = ip->autype; ifa->passwords = ip->passwords; - ifa->ptp_netmask = addr ? !(addr->flags & IA_PEER) : 0; + ifa->ptp_netmask = !(addr->flags & IA_PEER); if (ip->ptp_netmask < 2) ifa->ptp_netmask = ip->ptp_netmask; #endif @@ -554,6 +573,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i ifa->instance_id = ip->instance_id; #endif + ifa->type = ospf_iface_classify(ip->type, addr); /* Check validity of interface type */ @@ -578,12 +598,12 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i log(L_WARN "%s: Cannot use interface %s as %s, forcing %s", p->name, iface->name, ospf_it[old_type], ospf_it[ifa->type]); - /* Assign iface ID, for vlinks, this is ugly hack */ - ifa->iface_id = (ifa->type != OSPF_IT_VLINK) ? iface->index : oa->po->last_vlink_id++; + ifa->state = OSPF_IS_DOWN; init_list(&ifa->neigh_list); init_list(&ifa->nbma_list); + struct nbma_node *nb; WALK_LIST(nb, ip->nbma_list) { /* In OSPFv3, addr is link-local while configured neighbors could @@ -602,19 +622,8 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i add_nbma_node(ifa, nb, 0); } - ifa->state = OSPF_IS_DOWN; add_tail(&oa->po->iface_list, NODE ifa); - if (ifa->type == OSPF_IT_VLINK) - { - ifa->voa = ospf_find_area(oa->po, ip->voa); - ifa->vid = ip->vid; - - ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint); - - return; /* Don't lock, don't add sockets */ - } - /* * In some cases we allow more ospf_ifaces on one physical iface. * In OSPFv2, if they use different IP address prefix. @@ -622,7 +631,7 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i * Therefore, we store such info to lock->addr field. */ - lock = olock_new(pool); + struct object_lock *lock = olock_new(pool); #ifdef OSPFv2 lock->addr = ifa->addr->prefix; #else /* OSPFv3 */ @@ -637,6 +646,63 @@ ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *i olock_acquire(lock); } +void +ospf_iface_new_vlink(struct proto_ospf *po, struct ospf_iface_patt *ip) +{ + struct proto *p = &po->proto; + struct ospf_iface *ifa; + struct pool *pool; + + if (!po->vlink_sk) + return; + + OSPF_TRACE(D_EVENTS, "Adding vlink to %R via area %R", ip->vid, ip->voa); + + /* Vlink ifname is stored just after the ospf_iface structure */ + + pool = rp_new(p->pool, "OSPF Vlink"); + ifa = mb_allocz(pool, sizeof(struct ospf_iface) + 16); + ifa->oa = po->backbone; + ifa->cf = ip; + ifa->pool = pool; + + /* Assign iface ID, for vlinks, this is ugly hack */ + u32 vlink_id = po->last_vlink_id++; + ifa->iface_id = vlink_id + OSPF_VLINK_ID_OFFSET; + ifa->ifname = (void *) (ifa + 1); + bsprintf(ifa->ifname, "vlink%d", vlink_id); + + ifa->voa = ospf_find_area(po, ip->voa); + ifa->vid = ip->vid; + ifa->sk = po->vlink_sk; + + ifa->helloint = ip->helloint; + ifa->rxmtint = ip->rxmtint; + ifa->waitint = ip->waitint; + ifa->deadint = ip->deadint; + ifa->inftransdelay = ip->inftransdelay; + ifa->tx_length = OSPF_VLINK_MTU; + +#ifdef OSPFv2 + ifa->autype = ip->autype; + ifa->passwords = ip->passwords; +#endif + +#ifdef OSPFv3 + ifa->instance_id = ip->instance_id; +#endif + + ifa->type = OSPF_IT_VLINK; + + ifa->state = OSPF_IS_DOWN; + init_list(&ifa->neigh_list); + init_list(&ifa->nbma_list); + + add_tail(&po->iface_list, NODE ifa); + + ifa->hello_timer = tm_new_set(ifa->pool, hello_timer_hook, ifa, 0, ifa->helloint); +} + static void ospf_iface_change_timer(timer *tm, unsigned val) { @@ -653,12 +719,12 @@ int ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) { struct proto *p = &ifa->oa->po->proto; - struct nbma_node *nb, *nbx; - char *ifname = (ifa->type != OSPF_IT_VLINK) ? ifa->iface->name : "vlink"; + struct ospf_iface_patt *old = ifa->cf; + char *ifname = ifa->ifname; /* Type could be changed in ospf_iface_new(), but if config values are same then also results are same */ - int old_type = ospf_iface_classify(ifa->cf->type, ifa->addr); + int old_type = ospf_iface_classify(old->type, ifa->addr); int new_type = ospf_iface_classify(new->type, ifa->addr); if (old_type != new_type) return 0; @@ -668,10 +734,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) return 0; /* Change of these options would require to reset the iface socket */ - if ((new->real_bcast != ifa->cf->real_bcast) || - (new->tx_tos != ifa->cf->tx_tos) || - (new->tx_priority != ifa->cf->tx_priority) || - (new->ttl_security != ifa->cf->ttl_security)) + if ((new->real_bcast != old->real_bcast) || + (new->tx_tos != old->tx_tos) || + (new->tx_priority != old->tx_priority) || + (new->ttl_security != old->ttl_security)) return 0; ifa->cf = new; @@ -775,6 +841,8 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) ifa->strictnbma = new->strictnbma; } + struct nbma_node *nb, *nbx; + /* NBMA LIST - remove or update old */ WALK_LIST_DELSAFE(nb, nbx, ifa->nbma_list) { @@ -817,13 +885,35 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) } } - /* RX BUFF */ - if (ifa->rxbuf != new->rxbuf) + int update_buffers = 0; + + /* TX LENGTH */ + if (old->tx_length != new->tx_length) { - OSPF_TRACE(D_EVENTS, "Changing rxbuf interface %s from %d to %d", - ifname, ifa->rxbuf, new->rxbuf); - ifa->rxbuf = new->rxbuf; - ospf_iface_change_mtu(ifa->oa->po, ifa); + OSPF_TRACE(D_EVENTS, "Changing TX length on interface %s from %d to %d", + ifname, old->tx_length, new->tx_length); + + /* ifa cannot be vlink */ + ifa->tx_length = ifa_tx_length(ifa); + update_buffers = 1; + } + + /* RX BUFFER */ + if (old->rx_buffer != new->rx_buffer) + { + OSPF_TRACE(D_EVENTS, "Changing buffer size on interface %s from %d to %d", + ifname, old->rx_buffer, new->rx_buffer); + + /* ifa cannot be vlink */ + update_buffers = 1; + } + + /* Buffer size depends on both tx_length and rx_buffer options */ + if (update_buffers && ifa->sk) + { + uint bsize = ifa_bufsize(ifa); + sk_set_rbsize(ifa->sk, bsize); + sk_set_tbsize(ifa->sk, bsize); } /* LINK */ @@ -833,6 +923,7 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new) new->check_link ? "Enabling" : "Disabling", ifname); ifa->check_link = new->check_link; + /* ifa cannot be vlink */ if (!(ifa->iface->flags & IF_LINK_UP)) ospf_iface_sm(ifa, ifa->check_link ? ISM_LOOP : ISM_UNLOOP); } @@ -929,6 +1020,7 @@ ospf_iface_find_by_key(struct ospf_area *oa, struct ifa *a) void ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) { + struct proto *p = &oa->po->proto; struct ospf_iface_patt *ip; struct iface *iface; struct ifa *a; @@ -956,6 +1048,8 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) continue; /* Hard restart */ + log(L_INFO "%s: Restarting interface %s (%I/%d) in area %R", + p->name, ifa->ifname, a->prefix, a->pxlen, oa->areaid); ospf_iface_shutdown(ifa); ospf_iface_remove(ifa); } @@ -1062,6 +1156,7 @@ ospf_iface_find_by_key(struct ospf_area *oa, struct ifa *a, int iid) void ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) { + struct proto *p = &oa->po->proto; struct ospf_iface_patt *ip; struct iface *iface; struct ifa *a; @@ -1092,6 +1187,8 @@ ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac) continue; /* Hard restart */ + log(L_INFO "%s: Restarting interface %s (IID %d) in area %R", + p->name, ifa->ifname, ifa->instance_id, oa->areaid); ospf_iface_shutdown(ifa); ospf_iface_remove(ifa); } @@ -1108,32 +1205,29 @@ static void ospf_iface_change_mtu(struct proto_ospf *po, struct ospf_iface *ifa) { struct proto *p = &po->proto; - struct ospf_packet *op; - struct ospf_neighbor *n; - OSPF_TRACE(D_EVENTS, "Changing MTU on interface %s", ifa->iface->name); - if (ifa->sk) - { - ifa->sk->rbsize = rxbufsize(ifa); - ifa->sk->tbsize = rxbufsize(ifa); - sk_reallocate(ifa->sk); - } + /* ifa is not vlink */ - WALK_LIST(n, ifa->neigh_list) - { - op = (struct ospf_packet *) n->ldbdes; - n->ldbdes = mb_allocz(n->pool, ifa->iface->mtu); + OSPF_TRACE(D_EVENTS, "Changing MTU on interface %s", ifa->ifname); - if (ntohs(op->length) <= ifa->iface->mtu) /* If the packet in old buffer is bigger, let it filled by zeros */ - memcpy(n->ldbdes, op, ifa->iface->mtu); /* If the packet is old is same or smaller, copy it */ + ifa->tx_length = ifa_tx_length(ifa); - mb_free(op); - } + if (!ifa->sk) + return; + + /* We do not shrink dynamic buffers */ + uint bsize = ifa_bufsize(ifa); + if (bsize > ifa->sk->rbsize) + sk_set_rbsize(ifa->sk, bsize); + if (bsize > ifa->sk->tbsize) + sk_set_tbsize(ifa->sk, bsize); } static void ospf_iface_notify(struct proto_ospf *po, unsigned flags, struct ospf_iface *ifa) { + /* ifa is not vlink */ + if (flags & IF_CHANGE_DOWN) { ospf_iface_remove(ifa); @@ -1163,7 +1257,7 @@ ospf_if_notify(struct proto *p, unsigned flags, struct iface *iface) struct ospf_iface *ifa, *ifx; WALK_LIST_DELSAFE(ifa, ifx, po->iface_list) - if ((ifa->type != OSPF_IT_VLINK) && (ifa->iface == iface)) + if (ifa->iface == iface) ospf_iface_notify(po, flags, ifa); /* We use here that even shutting down iface also shuts down @@ -1186,22 +1280,19 @@ ospf_iface_info(struct ospf_iface *ifa) if (ifa->type == OSPF_IT_VLINK) { - cli_msg(-1015, "Virtual link to %R:", ifa->vid); + cli_msg(-1015, "Virtual link %s to %R:", ifa->ifname, ifa->vid); cli_msg(-1015, "\tPeer IP: %I", ifa->vip); - cli_msg(-1015, "\tTransit area: %R (%u)", ifa->voa->areaid, - ifa->voa->areaid); - cli_msg(-1015, "\tInterface: \"%s\"", - (ifa->iface ? ifa->iface->name : "(none)")); + cli_msg(-1015, "\tTransit area: %R (%u)", ifa->voa->areaid, ifa->voa->areaid); } else { #ifdef OSPFv2 if (ifa->addr->flags & IA_PEER) - cli_msg(-1015, "Interface %s (peer %I)", ifa->iface->name, ifa->addr->opposite); + cli_msg(-1015, "Interface %s (peer %I)", ifa->ifname, ifa->addr->opposite); else - cli_msg(-1015, "Interface %s (%I/%d)", ifa->iface->name, ifa->addr->prefix, ifa->addr->pxlen); + cli_msg(-1015, "Interface %s (%I/%d)", ifa->ifname, ifa->addr->prefix, ifa->addr->pxlen); #else /* OSPFv3 */ - cli_msg(-1015, "Interface %s (IID %d)", ifa->iface->name, ifa->instance_id); + cli_msg(-1015, "Interface %s (IID %d)", ifa->ifname, ifa->instance_id); #endif cli_msg(-1015, "\tType: %s%s", ospf_it[ifa->type], more); cli_msg(-1015, "\tArea: %R (%u)", ifa->oa->areaid, ifa->oa->areaid); diff --git a/proto/ospf/iface.h b/proto/ospf/iface.h index 3f887728..5a250e0a 100644 --- a/proto/ospf/iface.h +++ b/proto/ospf/iface.h @@ -17,11 +17,16 @@ void ospf_if_notify(struct proto *p, unsigned flags, struct iface *iface); void ospf_ifa_notify(struct proto *p, unsigned flags, struct ifa *a); void ospf_iface_info(struct ospf_iface *ifa); void ospf_iface_new(struct ospf_area *oa, struct ifa *addr, struct ospf_iface_patt *ip); +void ospf_iface_new_vlink(struct proto_ospf *po, struct ospf_iface_patt *ip); void ospf_iface_remove(struct ospf_iface *ifa); void ospf_iface_shutdown(struct ospf_iface *ifa); int ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new); void ospf_ifaces_reconfigure(struct ospf_area *oa, struct ospf_area_config *nac); +int ospf_iface_assure_bufsize(struct ospf_iface *ifa, uint plen); + +void ospf_open_vlink_sk(struct proto_ospf *po); + struct nbma_node *find_nbma_node_in(list *nnl, ip_addr ip); static inline struct nbma_node * diff --git a/proto/ospf/lsack.c b/proto/ospf/lsack.c index 00c50caf..fd8ead01 100644 --- a/proto/ospf/lsack.c +++ b/proto/ospf/lsack.c @@ -92,7 +92,7 @@ ospf_lsack_send(struct ospf_neighbor *n, int queue) op->length = htons(len); DBG("Sending and continuing! Len=%u\n", len); - OSPF_PACKET(ospf_dump_lsack, pk, "LSACK packet sent via %s", ifa->iface->name); + OSPF_PACKET(ospf_dump_lsack, pk, "LSACK packet sent via %s", ifa->ifname); if (ifa->type == OSPF_IT_BCAST) { @@ -121,7 +121,7 @@ ospf_lsack_send(struct ospf_neighbor *n, int queue) op->length = htons(len); DBG("Sending! Len=%u\n", len); - OSPF_PACKET(ospf_dump_lsack, pk, "LSACK packet sent via %s", ifa->iface->name); + OSPF_PACKET(ospf_dump_lsack, pk, "LSACK packet sent via %s", ifa->ifname); if (ifa->type == OSPF_IT_BCAST) { @@ -153,7 +153,7 @@ ospf_lsack_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, } struct ospf_lsack_packet *ps = (void *) ps_i; - OSPF_PACKET(ospf_dump_lsack, ps, "LSACK packet received from %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_lsack, ps, "LSACK packet received from %I via %s", n->ip, ifa->ifname); ospf_neigh_sm(n, INM_HELLOREC); diff --git a/proto/ospf/lsreq.c b/proto/ospf/lsreq.c index 1ba4fff9..15854ce7 100644 --- a/proto/ospf/lsreq.c +++ b/proto/ospf/lsreq.c @@ -82,7 +82,7 @@ ospf_lsreq_send(struct ospf_neighbor *n) i) * sizeof(struct ospf_lsreq_header); op->length = htons(length); - OSPF_PACKET(ospf_dump_lsreq, pk, "LSREQ packet sent to %I via %s", n->ip, n->ifa->iface->name); + OSPF_PACKET(ospf_dump_lsreq, pk, "LSREQ packet sent to %I via %s", n->ip, n->ifa->ifname); ospf_send_to(n->ifa, n->ip); } @@ -107,7 +107,7 @@ ospf_lsreq_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, } struct ospf_lsreq_packet *ps = (void *) ps_i; - OSPF_PACKET(ospf_dump_lsreq, ps, "LSREQ packet received from %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_lsreq, ps, "LSREQ packet received from %I via %s", n->ip, ifa->ifname); if (n->state < NEIGHBOR_EXCHANGE) return; diff --git a/proto/ospf/lsupd.c b/proto/ospf/lsupd.c index beac6c83..1859867b 100644 --- a/proto/ospf/lsupd.c +++ b/proto/ospf/lsupd.c @@ -278,22 +278,22 @@ ospf_lsupd_flood(struct proto_ospf *po, struct ospf_packet *op; struct ospf_lsa_header *lh; + /* Check iface buffer size */ + uint len2 = sizeof(struct ospf_lsupd_packet) + (hn ? ntohs(hn->length) : hh->length); + if (ospf_iface_assure_bufsize(ifa, len2) < 0) + { + /* Cannot fit in a tx buffer, skip that iface */ + log(L_ERR "OSPF: LSA too large to flood on %s (Type: %04x, Id: %R, Rt: %R)", + ifa->ifname, hh->type, hh->id, hh->rt); + continue; + } + pk = ospf_tx_buffer(ifa); op = &pk->ospf_packet; ospf_pkt_fill_hdr(ifa, pk, LSUPD_P); pk->lsano = htonl(1); - /* Check iface buffer size */ - int len2 = sizeof(struct ospf_lsupd_packet) + (hn ? ntohs(hn->length) : hh->length); - if (len2 > ospf_pkt_bufsize(ifa)) - { - /* Cannot fit in a tx buffer, skip that iface */ - log(L_ERR "OSPF: LSA too large to flood on %s (Type: %04x, Id: %R, Rt: %R)", - ifa->iface->name, hh->type, hh->id, hh->rt); - continue; - } - lh = (struct ospf_lsa_header *) (pk + 1); /* Copy LSA into the packet */ @@ -322,7 +322,7 @@ ospf_lsupd_flood(struct proto_ospf *po, op->length = htons(len); - OSPF_PACKET(ospf_dump_lsupd, pk, "LSUPD packet flooded via %s", ifa->iface->name); + OSPF_PACKET(ospf_dump_lsupd, pk, "LSUPD packet flooded via %s", ifa->ifname); switch (ifa->type) { @@ -406,7 +406,7 @@ ospf_lsupd_send_list(struct ospf_neighbor *n, list * l) break; /* LSA is larger than MTU, check buffer size */ - if (len2 > ospf_pkt_bufsize(n->ifa)) + if (ospf_iface_assure_bufsize(n->ifa, len2) < 0) { /* Cannot fit in a tx buffer, skip that */ log(L_ERR "OSPF: LSA too large to send (Type: %04x, Id: %R, Rt: %R)", @@ -414,6 +414,10 @@ ospf_lsupd_send_list(struct ospf_neighbor *n, list * l) lsr = NODE_NEXT(lsr); continue; } + + /* TX buffer could be reallocated */ + pkt = ospf_tx_buffer(n->ifa); + buf = (void *) pkt; } /* Copy the LSA to the packet */ @@ -432,7 +436,7 @@ ospf_lsupd_send_list(struct ospf_neighbor *n, list * l) pkt->lsano = htonl(lsano); pkt->ospf_packet.length = htons(len); OSPF_PACKET(ospf_dump_lsupd, pkt, "LSUPD packet sent to %I via %s", - n->ip, n->ifa->iface->name); + n->ip, n->ifa->ifname); ospf_send_to(n->ifa, n->ip); } } @@ -455,7 +459,7 @@ ospf_lsupd_receive(struct ospf_packet *ps_i, struct ospf_iface *ifa, } struct ospf_lsupd_packet *ps = (void *) ps_i; - OSPF_PACKET(ospf_dump_lsupd, ps, "LSUPD packet received from %I via %s", n->ip, ifa->iface->name); + OSPF_PACKET(ospf_dump_lsupd, ps, "LSUPD packet received from %I via %s", n->ip, ifa->ifname); if (n->state < NEIGHBOR_EXCHANGE) { diff --git a/proto/ospf/neighbor.c b/proto/ospf/neighbor.c index 61224ec2..faaaf232 100644 --- a/proto/ospf/neighbor.c +++ b/proto/ospf/neighbor.c @@ -69,7 +69,6 @@ ospf_neighbor_new(struct ospf_iface *ifa) add_tail(&ifa->neigh_list, NODE n); n->adj = 0; n->csn = 0; - n->ldbdes = mb_allocz(pool, ifa->iface->mtu); n->state = NEIGHBOR_DOWN; init_lists(n); @@ -286,10 +285,10 @@ can_do_adj(struct ospf_neighbor *n) { case OSPF_IS_DOWN: case OSPF_IS_LOOP: - bug("%s: Iface %s in down state?", p->name, ifa->iface->name); + bug("%s: Iface %s in down state?", p->name, ifa->ifname); break; case OSPF_IS_WAITING: - DBG("%s: Neighbor? on iface %s\n", p->name, ifa->iface->name); + DBG("%s: Neighbor? on iface %s\n", p->name, ifa->ifname); break; case OSPF_IS_DROTHER: if (((n->rid == ifa->drid) || (n->rid == ifa->bdrid)) @@ -303,15 +302,15 @@ can_do_adj(struct ospf_neighbor *n) i = 1; break; default: - bug("%s: Iface %s in unknown state?", p->name, ifa->iface->name); + bug("%s: Iface %s in unknown state?", p->name, ifa->ifname); break; } break; default: - bug("%s: Iface %s is unknown type?", p->name, ifa->iface->name); + bug("%s: Iface %s is unknown type?", p->name, ifa->ifname); break; } - DBG("%s: Iface %s can_do_adj=%d\n", p->name, ifa->iface->name, i); + DBG("%s: Iface %s can_do_adj=%d\n", p->name, ifa->ifname, i); return i; } @@ -556,9 +555,8 @@ neighbor_timer_hook(timer * timer) struct ospf_iface *ifa = n->ifa; struct proto *p = &ifa->oa->po->proto; - OSPF_TRACE(D_EVENTS, - "Inactivity timer fired on interface %s for neighbor %I.", - ifa->iface->name, n->ip); + OSPF_TRACE(D_EVENTS, "Inactivity timer fired on interface %s for neighbor %I.", + ifa->ifname, n->ip); ospf_neigh_remove(n); } @@ -591,7 +589,7 @@ ospf_neigh_bfd_hook(struct bfd_request *req) if (req->down) { OSPF_TRACE(D_EVENTS, "BFD session down for %I on %s", - n->ip, n->ifa->iface->name); + n->ip, n->ifa->ifname); ospf_neigh_remove(n); } @@ -641,8 +639,7 @@ ospf_sh_neigh_info(struct ospf_neighbor *n) pos = "ptp "; cli_msg(-1013, "%-1R\t%3u\t%s/%s\t%-5s\t%-10s %-1I", n->rid, n->priority, - ospf_ns[n->state], pos, etime, - (ifa->type == OSPF_IT_VLINK ? "vlink" : ifa->iface->name), n->ip); + ospf_ns[n->state], pos, etime, ifa->ifname, n->ip); } static void @@ -653,7 +650,7 @@ rxmt_timer_hook(timer * timer) struct top_hash_entry *en; DBG("%s: RXMT timer fired on interface %s for neigh: %I.\n", - p->name, n->ifa->iface->name, n->ip); + p->name, n->ifa->ifname, n->ip); if(n->state < NEIGHBOR_EXSTART) return; diff --git a/proto/ospf/ospf.c b/proto/ospf/ospf.c index 232f3f6c..cf520401 100644 --- a/proto/ospf/ospf.c +++ b/proto/ospf/ospf.c @@ -232,7 +232,6 @@ ospf_start(struct proto *p) struct ospf_area_config *ac; po->router_id = proto_get_router_id(p->cf); - po->last_vlink_id = 0x80000000; po->rfc1583 = c->rfc1583; po->stub_router = c->stub_router; po->ebit = 0; @@ -258,10 +257,13 @@ ospf_start(struct proto *p) WALK_LIST(ac, c->area_list) ospf_area_add(po, ac, 0); + if (c->abr) + ospf_open_vlink_sk(po); + /* Add all virtual links */ struct ospf_iface_patt *ic; WALK_LIST(ic, c->vlink_list) - ospf_iface_new(po->backbone, NULL, ic); + ospf_iface_new_vlink(po, ic); return PS_UP; } @@ -277,7 +279,7 @@ ospf_dump(struct proto *p) WALK_LIST(ifa, po->iface_list) { - OSPF_TRACE(D_EVENTS, "Interface: %s", (ifa->iface ? ifa->iface->name : "(null)")); + OSPF_TRACE(D_EVENTS, "Interface: %s", ifa->ifname); OSPF_TRACE(D_EVENTS, "state: %u", ifa->state); OSPF_TRACE(D_EVENTS, "DR: %R", ifa->drid); OSPF_TRACE(D_EVENTS, "BDR: %R", ifa->bdrid); @@ -381,7 +383,7 @@ schedule_net_lsa(struct ospf_iface *ifa) { struct proto *p = &ifa->oa->po->proto; - OSPF_TRACE(D_EVENTS, "Scheduling network-LSA origination for iface %s", ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Scheduling network-LSA origination for iface %s", ifa->ifname); ifa->orignet = 1; } @@ -391,7 +393,7 @@ schedule_link_lsa(struct ospf_iface *ifa) { struct proto *p = &ifa->oa->po->proto; - OSPF_TRACE(D_EVENTS, "Scheduling link-LSA origination for iface %s", ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Scheduling link-LSA origination for iface %s", ifa->ifname); ifa->origlink = 1; } #endif @@ -631,7 +633,7 @@ ospf_get_route_info(rte * rte, byte * buf, ea_list * attrs UNUSED) { char *type = ""; - switch(rte->attrs->source) + switch (rte->attrs->source) { case RTS_OSPF: type = "I"; @@ -769,7 +771,7 @@ ospf_reconfigure(struct proto *p, struct proto_config *c) if (ifa) ospf_iface_reconfigure(ifa, ip); else - ospf_iface_new(po->backbone, NULL, ip); + ospf_iface_new_vlink(po, ip); } /* Delete remaining ifaces and areas */ @@ -808,7 +810,7 @@ ospf_sh_neigh(struct proto *p, char *iff) cli_msg(-1013, "%-12s\t%3s\t%-15s\t%-5s\t%-10s %-12s", "Router ID", "Pri", " State", "DTime", "Interface", "Router IP"); WALK_LIST(ifa, po->iface_list) - if ((iff == NULL) || patmatch(iff, ifa->iface->name)) + if ((iff == NULL) || patmatch(iff, ifa->ifname)) WALK_LIST(n, ifa->neigh_list) ospf_sh_neigh_info(n); cli_msg(0, ""); @@ -917,7 +919,7 @@ ospf_sh_iface(struct proto *p, char *iff) cli_msg(-1015, "%s:", p->name); WALK_LIST(ifa, po->iface_list) - if ((iff == NULL) || patmatch(iff, ifa->iface->name)) + if ((iff == NULL) || patmatch(iff, ifa->ifname)) ospf_iface_info(ifa); cli_msg(0, ""); } diff --git a/proto/ospf/ospf.h b/proto/ospf/ospf.h index 46a1c3c1..66719e30 100644 --- a/proto/ospf/ospf.h +++ b/proto/ospf/ospf.h @@ -10,14 +10,8 @@ #define _BIRD_OSPF_H_ #define MAXNETS 10 +#define OSPF_MIN_PKT_SIZE 256 #define OSPF_MAX_PKT_SIZE 65535 -/* - * RFC 2328 says, maximum packet size is 65535 (IP packet size - * limit). Really a bit less for OSPF, because this contains also IP - * header. This could be too much for small systems, so I normally - * allocate 2*mtu (i found one cisco sending packets mtu+16). OSPF - * packets are almost always sent small enough to not be fragmented. - */ #ifdef LOCAL_DEBUG #define OSPF_FORCE_DEBUG 1 @@ -78,6 +72,8 @@ do { if ((p->debug & D_PACKETS) || OSPF_FORCE_DEBUG) \ #define DEFAULT_ECMP_LIMIT 16 #define DEFAULT_TRANSINT 40 +#define OSPF_VLINK_ID_OFFSET 0x80000000 + struct ospf_config { @@ -179,12 +175,14 @@ struct ospf_area_config struct ospf_iface { node n; - struct iface *iface; /* Nest's iface, non-NULL (unless type OSPF_IT_VLINK) */ + struct iface *iface; /* Nest's iface (NULL for vlinks) */ struct ifa *addr; /* IP prefix associated with that OSPF iface */ struct ospf_area *oa; struct ospf_iface_patt *cf; + char *ifname; /* Interface name (iface->name), new one for vlinks */ + pool *pool; - sock *sk; /* IP socket (for DD ...) */ + sock *sk; /* IP socket */ list neigh_list; /* List of neigbours */ u32 cost; /* Cost of iface */ u32 waitint; /* number of sec before changing state from wait */ @@ -273,6 +271,7 @@ struct ospf_iface u8 sk_dr; /* Socket is a member of DRouters group */ u8 marked; /* Used in OSPF reconfigure */ u16 rxbuf; /* Buffer size */ + u16 tx_length; /* Soft TX packet length limit, usually MTU */ u8 check_link; /* Whether iface link change is used */ u8 ecmp_weight; /* Weight used for ECMP */ u8 ptp_netmask; /* Send real netmask for P2P */ @@ -704,13 +703,14 @@ struct ospf_neighbor slist lsrtl; /* Link state retransmission list */ siterator lsrti; struct top_graph *lsrth; - void *ldbdes; /* Last database description packet */ timer *rxmt_timer; /* RXMT timer */ list ackl[2]; #define ACKL_DIRECT 0 #define ACKL_DELAY 1 timer *ackd_timer; /* Delayed ack timer */ struct bfd_request *bfd_req; /* BFD request, if BFD is used */ + void *ldd_buffer; /* Last database description packet */ + u32 ldd_bsize; /* Buffer size for ldd_buffer */ u32 csn; /* Last received crypt seq number (for MD5) */ }; @@ -783,6 +783,7 @@ struct proto_ospf void *lsab; /* LSA buffer used when originating router LSAs */ int lsab_size, lsab_used; linpool *nhpool; /* Linpool used for next hops computed in SPF */ + sock *vlink_sk; /* IP socket used for vlink TX */ u32 router_id; u32 last_vlink_id; /* Interface IDs for vlinks (starts at 0x80000000) */ }; @@ -806,9 +807,9 @@ struct ospf_iface_patt u32 vid; int tx_tos; int tx_priority; - u16 rxbuf; -#define OSPF_RXBUF_NORMAL 0 -#define OSPF_RXBUF_LARGE 1 + u16 tx_length; + u16 rx_buffer; + #define OSPF_RXBUF_MINSIZE 256 /* Minimal allowed size */ u16 autype; /* Not really used in OSPFv3 */ #define OSPF_AUTH_NONE 0 @@ -822,6 +823,7 @@ struct ospf_iface_patt u8 ptp_netmask; /* bool + 2 for unspecified */ u8 ttl_security; /* bool + 2 for TX only */ u8 bfd; + u8 bsd_secondary; #ifdef OSPFv2 list *passwords; diff --git a/proto/ospf/packet.c b/proto/ospf/packet.c index 4338bc1a..cd4b8a97 100644 --- a/proto/ospf/packet.c +++ b/proto/ospf/packet.c @@ -39,7 +39,6 @@ ospf_pkt_fill_hdr(struct ospf_iface *ifa, void *buf, u8 h_type) unsigned ospf_pkt_maxsize(struct ospf_iface *ifa) { - unsigned mtu = (ifa->type == OSPF_IT_VLINK) ? OSPF_VLINK_MTU : ifa->iface->mtu; unsigned headers = SIZE_OF_IP_HEADER; #ifdef OSPFv2 @@ -47,7 +46,7 @@ ospf_pkt_maxsize(struct ospf_iface *ifa) headers += OSPF_AUTH_CRYPT_SIZE; #endif - return mtu - headers; + return ifa->tx_length - headers; } #ifdef OSPFv2 @@ -263,7 +262,7 @@ ospf_rx_hook(sock *sk, int size) return 1; DBG("OSPF: RX hook called (iface %s, src %I, dst %I)\n", - sk->iface->name, sk->faddr, sk->laddr); + sk->ifname, sk->faddr, sk->laddr); /* Initially, the packet is associated with the 'master' iface */ struct ospf_iface *ifa = sk->data; @@ -321,22 +320,31 @@ ospf_rx_hook(sock *sk, int size) return 1; } - int osize = ntohs(ps->length); - if ((unsigned) osize < sizeof(struct ospf_packet)) + uint plen = ntohs(ps->length); + if ((plen < sizeof(struct ospf_packet)) || ((plen % 4) != 0)) { - log(L_ERR "%s%I - too low value in size field (%u bytes)", mesg, sk->faddr, osize); + log(L_ERR "%s%I - invalid length (%u)", mesg, sk->faddr, plen); return 1; } - if ((osize > size) || ((osize % 4) != 0)) + if (sk->flags & SKF_TRUNCATED) { - log(L_ERR "%s%I - size field does not match (%d/%d)", mesg, sk->faddr, osize, size); + log(L_WARN "%s%I - too large (%d/%d)", mesg, sk->faddr, plen, size); + + /* If we have dynamic buffers and received truncated message, we expand RX buffer */ + + uint bs = plen + 256; + bs = BIRD_ALIGN(bs, 1024); + + if (!ifa->cf->rx_buffer && (bs > sk->rbsize)) + sk_set_rbsize(sk, bs); + return 1; } - if ((unsigned) size > sk->rbsize) + if (plen > size) { - log(L_ERR "%s%I - too large (%d vs %d)", mesg, sk->faddr, size, sk->rbsize); + log(L_ERR "%s%I - size field does not match (%d/%d)", mesg, sk->faddr, plen, size); return 1; } @@ -349,7 +357,7 @@ ospf_rx_hook(sock *sk, int size) #ifdef OSPFv2 if ((ps->autype != htons(OSPF_AUTH_CRYPT)) && (!ipsum_verify(ps, 16, (void *) ps + sizeof(struct ospf_packet), - osize - sizeof(struct ospf_packet), NULL))) + plen - sizeof(struct ospf_packet), NULL))) { log(L_ERR "%s%I - bad checksum", mesg, sk->faddr); return 1; @@ -448,7 +456,7 @@ ospf_rx_hook(sock *sk, int size) if(!n && (ps->type != HELLO_P)) { log(L_WARN "OSPF: Received non-hello packet from unknown neighbor (src %I, iface %s)", - sk->faddr, ifa->iface->name); + sk->faddr, ifa->ifname); return 1; } @@ -495,20 +503,30 @@ ospf_rx_hook(sock *sk, int size) return 1; } +/* void ospf_tx_hook(sock * sk) { struct ospf_iface *ifa= (struct ospf_iface *) (sk->data); // struct proto *p = (struct proto *) (ifa->oa->po); - log(L_ERR "OSPF: TX hook called on %s", ifa->iface->name); + log(L_ERR "OSPF: TX hook called on %s", ifa->ifname); } +*/ void ospf_err_hook(sock * sk, int err) { struct ospf_iface *ifa= (struct ospf_iface *) (sk->data); -// struct proto *p = (struct proto *) (ifa->oa->po); - log(L_ERR "OSPF: Socket error on %s: %M", ifa->iface->name, err); + struct proto *p = &(ifa->oa->po->proto); + log(L_ERR "%s: Socket error on %s: %M", p->name, ifa->ifname, err); +} + +void +ospf_verr_hook(sock *sk, int err) +{ + struct proto_ospf *po = (struct proto_ospf *) (sk->data); + struct proto *p = &po->proto; + log(L_ERR "%s: Vlink socket error: %M", p->name, err); } void @@ -543,9 +561,9 @@ ospf_send_to(struct ospf_iface *ifa, ip_addr dst) #endif ospf_pkt_finalize(ifa, pkt); - if (sk->tbuf != sk->tpos) - log(L_ERR "Aiee, old packet was overwritten in TX buffer"); - sk_send_to(sk, len, dst, 0); + int done = sk_send_to(sk, len, dst, 0); + if (!done) + log(L_WARN "OSPF: TX queue full on %s", ifa->ifname); } diff --git a/proto/ospf/packet.h b/proto/ospf/packet.h index fbcb4288..4ba1f08c 100644 --- a/proto/ospf/packet.h +++ b/proto/ospf/packet.h @@ -11,10 +11,11 @@ #define _BIRD_OSPF_PACKET_H_ void ospf_pkt_fill_hdr(struct ospf_iface *ifa, void *buf, u8 h_type); -unsigned ospf_pkt_maxsize(struct ospf_iface *ifa); +uint ospf_pkt_maxsize(struct ospf_iface *ifa); int ospf_rx_hook(sock * sk, int size); -void ospf_tx_hook(sock * sk); +// void ospf_tx_hook(sock * sk); void ospf_err_hook(sock * sk, int err); +void ospf_verr_hook(sock *sk, int err); void ospf_send_to_agt(struct ospf_iface *ifa, u8 state); void ospf_send_to_bdr(struct ospf_iface *ifa); void ospf_send_to(struct ospf_iface *ifa, ip_addr ip); @@ -23,17 +24,5 @@ static inline void ospf_send_to_all(struct ospf_iface *ifa) { ospf_send_to(ifa, static inline void * ospf_tx_buffer(struct ospf_iface *ifa) { return ifa->sk->tbuf; } -static inline unsigned -ospf_pkt_bufsize(struct ospf_iface *ifa) -{ -#ifdef OSPFv2 - unsigned headers = (ifa->autype == OSPF_AUTH_CRYPT) ? OSPF_AUTH_CRYPT_SIZE : 0; -#else - unsigned headers = 0; -#endif - - return ifa->sk->tbsize - headers; -} - #endif /* _BIRD_OSPF_PACKET_H_ */ diff --git a/proto/ospf/rt.c b/proto/ospf/rt.c index 52110aa1..1b39bda0 100644 --- a/proto/ospf/rt.c +++ b/proto/ospf/rt.c @@ -1079,44 +1079,42 @@ ospf_check_vlinks(struct proto_ospf *po) { struct proto *p = &po->proto; - struct ospf_iface *iface; - WALK_LIST(iface, po->iface_list) + struct ospf_iface *ifa; + WALK_LIST(ifa, po->iface_list) { - if (iface->type == OSPF_IT_VLINK) + if (ifa->type == OSPF_IT_VLINK) { struct top_hash_entry *tmp; - tmp = ospf_hash_find_rt(po->gr, iface->voa->areaid, iface->vid); + tmp = ospf_hash_find_rt(po->gr, ifa->voa->areaid, ifa->vid); if (tmp && (tmp->color == INSPF) && ipa_nonzero(tmp->lb) && tmp->nhs) { struct ospf_iface *nhi = ospf_iface_find(po, tmp->nhs->iface); - if ((iface->state != OSPF_IS_PTP) - || (iface->vifa != nhi) - || !ipa_equal(iface->vip, tmp->lb)) + if ((ifa->state != OSPF_IS_PTP) + || (ifa->vifa != nhi) + || !ipa_equal(ifa->vip, tmp->lb)) { OSPF_TRACE(D_EVENTS, "Vlink peer %R found", tmp->lsa.id); - ospf_iface_sm(iface, ISM_DOWN); - iface->vifa = nhi; - iface->iface = nhi->iface; - iface->addr = nhi->addr; - iface->sk = nhi->sk; - iface->cost = tmp->dist; - iface->vip = tmp->lb; - ospf_iface_sm(iface, ISM_UP); + ospf_iface_sm(ifa, ISM_DOWN); + ifa->vifa = nhi; + ifa->addr = nhi->addr; + ifa->cost = tmp->dist; + ifa->vip = tmp->lb; + ospf_iface_sm(ifa, ISM_UP); } - else if ((iface->state == OSPF_IS_PTP) && (iface->cost != tmp->dist)) + else if ((ifa->state == OSPF_IS_PTP) && (ifa->cost != tmp->dist)) { - iface->cost = tmp->dist; + ifa->cost = tmp->dist; schedule_rt_lsa(po->backbone); } } else { - if (iface->state > OSPF_IS_DOWN) + if (ifa->state > OSPF_IS_DOWN) { - OSPF_TRACE(D_EVENTS, "Vlink peer %R lost", iface->vid); - ospf_iface_sm(iface, ISM_DOWN); + OSPF_TRACE(D_EVENTS, "Vlink peer %R lost", ifa->vid); + ospf_iface_sm(ifa, ISM_DOWN); } } } diff --git a/proto/ospf/topology.c b/proto/ospf/topology.c index f25db9a7..4af5afa5 100644 --- a/proto/ospf/topology.c +++ b/proto/ospf/topology.c @@ -306,7 +306,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) break; default: - log("Unknown interface type %s", ifa->iface->name); + log("Unknown interface type %s", ifa->ifname); break; } @@ -447,7 +447,7 @@ originate_rt_lsa_body(struct ospf_area *oa, u16 *length) break; default: - log("Unknown interface type %s", ifa->iface->name); + log("Unknown interface type %s", ifa->ifname); break; } @@ -596,8 +596,7 @@ originate_net_lsa(struct ospf_iface *ifa) void *body; - OSPF_TRACE(D_EVENTS, "Originating network-LSA for iface %s", - ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Originating network-LSA for iface %s", ifa->ifname); lsa.age = 0; lsa.type = LSA_T_NET; @@ -628,8 +627,7 @@ flush_net_lsa(struct ospf_iface *ifa) if (ifa->net_lsa == NULL) return; - OSPF_TRACE(D_EVENTS, "Flushing network-LSA for iface %s", - ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Flushing network-LSA for iface %s", ifa->ifname); ifa->net_lsa->lsa.sn += 1; ifa->net_lsa->lsa.age = LSA_MAXAGE; lsasum_calculate(&ifa->net_lsa->lsa, ifa->net_lsa->lsa_body); @@ -1212,8 +1210,11 @@ originate_link_lsa(struct ospf_iface *ifa) struct proto *p = &po->proto; void *body; - /* FIXME check for vlink and skip that? */ - OSPF_TRACE(D_EVENTS, "Originating link-LSA for iface %s", ifa->iface->name); + /* Vlinks do not have link-LSAs */ + if (ifa->type == OSPF_IT_VLINK) + return; + + OSPF_TRACE(D_EVENTS, "Originating link-LSA for iface %s", ifa->ifname); lsa.age = 0; lsa.type = LSA_T_LINK; @@ -1498,8 +1499,7 @@ originate_prefix_net_lsa(struct ospf_iface *ifa) struct ospf_lsa_header lsa; void *body; - OSPF_TRACE(D_EVENTS, "Originating network prefix-LSA for iface %s", - ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Originating network prefix-LSA for iface %s", ifa->ifname); lsa.age = 0; lsa.type = LSA_T_PREFIX; @@ -1525,8 +1525,7 @@ flush_prefix_net_lsa(struct ospf_iface *ifa) if (en == NULL) return; - OSPF_TRACE(D_EVENTS, "Flushing network prefix-LSA for iface %s", - ifa->iface->name); + OSPF_TRACE(D_EVENTS, "Flushing network prefix-LSA for iface %s", ifa->ifname); en->lsa.sn += 1; en->lsa.age = LSA_MAXAGE; diff --git a/proto/radv/packets.c b/proto/radv/packets.c index 38abaa4c..997fda3d 100644 --- a/proto/radv/packets.c +++ b/proto/radv/packets.c @@ -404,7 +404,7 @@ radv_sk_open(struct radv_iface *ifa) sock *sk = sk_new(ifa->ra->p.pool); sk->type = SK_IP; sk->dport = ICMPV6_PROTO; - sk->saddr = IPA_NONE; + sk->saddr = ifa->addr->ip; sk->ttl = 255; /* Mandatory for Neighbor Discovery packets */ sk->rx_hook = radv_rx_hook; @@ -419,8 +419,6 @@ radv_sk_open(struct radv_iface *ifa) if (sk_open(sk) != 0) goto err; - sk->saddr = ifa->addr->ip; - /* We want listen just to ICMPv6 messages of type RS and RA */ if (sk_set_icmp_filter(sk, ICMPV6_RS, ICMPV6_RA) < 0) goto err; diff --git a/proto/rip/rip.c b/proto/rip/rip.c index 5cc40403..9730df77 100644 --- a/proto/rip/rip.c +++ b/proto/rip/rip.c @@ -717,7 +717,6 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_ if (new) { if (new->addr->flags & IA_PEER) log( L_WARN "%s: rip is not defined over unnumbered links", p->name ); - rif->sock->saddr = IPA_NONE; if (rif->multicast) { #ifndef IPV6 rif->sock->daddr = ipa_from_u32(0xe0000009); diff --git a/sysdep/bsd/sysio.h b/sysdep/bsd/sysio.h index cf049a0b..e45deb6f 100644 --- a/sysdep/bsd/sysio.h +++ b/sysdep/bsd/sysio.h @@ -38,18 +38,13 @@ get_inaddr(ip_addr *a, struct in6_addr *ia) ipa_ntoh(*a); } -static inline char * -sysio_bind_to_iface(sock *s) -{ - /* Unfortunately not available */ - return NULL; -} - #else #include #include +#include // Workaround for some BSDs +#include static inline void set_inaddr(struct in_addr * ia, ip_addr a) @@ -93,7 +88,7 @@ sysio_setup_multicast(sock *s) static inline char * sysio_join_group(sock *s, ip_addr maddr) { - struct ip_mreq mreq; + struct ip_mreq mreq; bzero(&mreq, sizeof(mreq)); set_inaddr(&mreq.imr_interface, s->iface->addr->ip); @@ -152,7 +147,7 @@ sysio_register_cmsgs(sock *s) return NULL; } -static void +static inline void sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) { struct cmsghdr *cm; @@ -190,26 +185,17 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) } /* Unfortunately, IP_SENDSRCADDR does not work for raw IP sockets on BSD kernels */ -/* -static void + +static inline void sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { +#ifdef IP_SENDSRCADDR struct cmsghdr *cm; struct in_addr *sa; - if (!(s->flags & SKF_LADDR_TX)) - return; - msg->msg_control = cbuf; msg->msg_controllen = cbuflen; - if (s->iface) - { - struct in_addr m; - set_inaddr(&m, s->saddr); - setsockopt(s->fd, IPPROTO_IP, IP_MULTICAST_IF, &m, sizeof(m)); - } - cm = CMSG_FIRSTHDR(msg); cm->cmsg_level = IPPROTO_IP; cm->cmsg_type = IP_SENDSRCADDR; @@ -219,8 +205,31 @@ sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) set_inaddr(sa, s->saddr); msg->msg_controllen = cm->cmsg_len; +#endif +} + + +static void +fill_ip_header(sock *s, void *hdr, int dlen) +{ + struct ip *ip = hdr; + + bzero(ip, 20); + + ip->ip_v = 4; + ip->ip_hl = 5; + ip->ip_tos = (s->tos < 0) ? 0 : s->tos; + ip->ip_len = 20 + dlen; + ip->ip_ttl = (s->ttl < 0) ? 64 : s->ttl; + ip->ip_p = s->dport; + set_inaddr(&ip->ip_src, s->saddr); + set_inaddr(&ip->ip_dst, s->daddr); + +#ifdef __OpenBSD__ + /* OpenBSD expects ip_len in network order, other BSDs expect host order */ + ip->ip_len = htons(ip->ip_len); +#endif } -*/ #endif diff --git a/sysdep/cf/README b/sysdep/cf/README index 1c11edcf..768a3727 100644 --- a/sysdep/cf/README +++ b/sysdep/cf/README @@ -8,6 +8,8 @@ CONFIG_ALL_TABLES_AT_ONCE Kernel scanner wants to process all tables at once CONFIG_MC_PROPER_SRC Multicast packets have source address according to socket saddr field CONFIG_SKIP_MC_BIND Don't call bind on multicast socket (def for *BSD) +CONFIG_NO_IFACE_BIND Bind to iface is not available, use workarounds (def for *BSD) CONFIG_UNIX_DONTROUTE Use setsockopts DONTROUTE (undef for *BSD) +CONFIG_USE_HDRINCL Use IP_HDRINCL instead of control messages for source address on raw IP sockets. CONFIG_RESTRICTED_PRIVILEGES Implements restricted privileges using drop_uid() diff --git a/sysdep/cf/bsd.h b/sysdep/cf/bsd.h index 5e6d03e8..df199199 100644 --- a/sysdep/cf/bsd.h +++ b/sysdep/cf/bsd.h @@ -12,6 +12,7 @@ #define CONFIG_SKIP_MC_BIND #define CONFIG_NO_IFACE_BIND +#define CONFIG_USE_HDRINCL /* Link: sysdep/unix diff --git a/sysdep/linux/sysio.h b/sysdep/linux/sysio.h index 250ed586..56c3387d 100644 --- a/sysdep/linux/sysio.h +++ b/sysdep/linux/sysio.h @@ -30,17 +30,6 @@ get_inaddr(ip_addr *a, struct in6_addr *ia) ipa_ntoh(*a); } -static inline char * -sysio_bind_to_iface(sock *s) -{ - struct ifreq ifr; - strcpy(ifr.ifr_name, s->iface->name); - if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) - return "SO_BINDTODEVICE"; - - return NULL; -} - #else static inline void @@ -69,11 +58,10 @@ struct ip_mreqn #endif -static inline void fill_mreqn(struct ip_mreqn *m, struct iface *ifa, ip_addr saddr, ip_addr maddr) +static inline void fill_mreqn(struct ip_mreqn *m, ip_addr maddr, struct iface *ifa) { bzero(m, sizeof(*m)); m->imr_ifindex = ifa->index; - set_inaddr(&m->imr_address, saddr); set_inaddr(&m->imr_multiaddr, maddr); } @@ -90,16 +78,10 @@ sysio_setup_multicast(sock *s) return "IP_MULTICAST_TTL"; /* This defines where should we send _outgoing_ multicasts */ - fill_mreqn(&m, s->iface, s->saddr, IPA_NONE); + fill_mreqn(&m, IPA_NONE, s->iface); if (setsockopt(s->fd, SOL_IP, IP_MULTICAST_IF, &m, sizeof(m)) < 0) return "IP_MULTICAST_IF"; - /* Is this necessary? */ - struct ifreq ifr; - strcpy(ifr.ifr_name, s->iface->name); - if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) - return "SO_BINDTODEVICE"; - return NULL; } @@ -109,7 +91,7 @@ sysio_join_group(sock *s, ip_addr maddr) struct ip_mreqn m; /* And this one sets interface for _receiving_ multicasts from */ - fill_mreqn(&m, s->iface, s->saddr, maddr); + fill_mreqn(&m, maddr, s->iface); if (setsockopt(s->fd, SOL_IP, IP_ADD_MEMBERSHIP, &m, sizeof(m)) < 0) return "IP_ADD_MEMBERSHIP"; @@ -122,7 +104,7 @@ sysio_leave_group(sock *s, ip_addr maddr) struct ip_mreqn m; /* And this one sets interface for _receiving_ multicasts from */ - fill_mreqn(&m, s->iface, s->saddr, maddr); + fill_mreqn(&m, maddr, s->iface); if (setsockopt(s->fd, SOL_IP, IP_DROP_MEMBERSHIP, &m, sizeof(m)) < 0) return "IP_DROP_MEMBERSHIP"; @@ -132,10 +114,7 @@ sysio_leave_group(sock *s, ip_addr maddr) #endif -#include -#include - -/* For the case that we have older kernel headers */ +/* For the case that we have older libc headers */ /* Copied from Linux kernel file include/linux/tcp.h */ #ifndef TCP_MD5SIG @@ -175,7 +154,7 @@ sk_set_md5_auth_int(sock *s, sockaddr *sa, char *passwd) memcpy(&md5.tcpm_key, passwd, len); } - int rv = setsockopt(s->fd, IPPROTO_TCP, TCP_MD5SIG, &md5, sizeof(md5)); + int rv = setsockopt(s->fd, SOL_TCP, TCP_MD5SIG, &md5, sizeof(md5)); if (rv < 0) { @@ -203,11 +182,11 @@ sysio_register_cmsgs(sock *s) int ok = 1; if ((s->flags & SKF_LADDR_RX) && - (setsockopt(s->fd, IPPROTO_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IP, IP_PKTINFO, &ok, sizeof(ok)) < 0)) return "IP_PKTINFO"; if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, IPPROTO_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IP, IP_RECVTTL, &ok, sizeof(ok)) < 0)) return "IP_RECVTTL"; return NULL; @@ -222,10 +201,10 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) { - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_PKTINFO) + if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_PKTINFO) pi = (struct in_pktinfo *) CMSG_DATA(cm); - if (cm->cmsg_level == IPPROTO_IP && cm->cmsg_type == IP_TTL) + if (cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_TTL) ttl = (int *) CMSG_DATA(cm); } @@ -249,31 +228,28 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) return; } -/* static void sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { struct cmsghdr *cm; struct in_pktinfo *pi; - if (!(s->flags & SKF_LADDR_TX)) - return; - msg->msg_control = cbuf; msg->msg_controllen = cbuflen; cm = CMSG_FIRSTHDR(msg); - cm->cmsg_level = IPPROTO_IP; + cm->cmsg_level = SOL_IP; cm->cmsg_type = IP_PKTINFO; cm->cmsg_len = CMSG_LEN(sizeof(*pi)); pi = (struct in_pktinfo *) CMSG_DATA(cm); - set_inaddr(&pi->ipi_spec_dst, s->saddr); pi->ipi_ifindex = s->iface ? s->iface->index : 0; + set_inaddr(&pi->ipi_spec_dst, s->saddr); + set_inaddr(&pi->ipi_addr, IPA_NONE); msg->msg_controllen = cm->cmsg_len; } -*/ + #endif @@ -292,7 +268,7 @@ sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) static int sk_set_min_ttl4(sock *s, int ttl) { - if (setsockopt(s->fd, IPPROTO_IP, IP_MINTTL, &ttl, sizeof(ttl)) < 0) + if (setsockopt(s->fd, SOL_IP, IP_MINTTL, &ttl, sizeof(ttl)) < 0) { if (errno == ENOPROTOOPT) log(L_ERR "Kernel does not support IPv4 TTL security"); @@ -310,7 +286,7 @@ sk_set_min_ttl4(sock *s, int ttl) static int sk_set_min_ttl6(sock *s, int ttl) { - if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) < 0) + if (setsockopt(s->fd, SOL_IPV6, IPV6_MINHOPCOUNT, &ttl, sizeof(ttl)) < 0) { if (errno == ENOPROTOOPT) log(L_ERR "Kernel does not support IPv6 TTL security"); diff --git a/sysdep/unix/io.c b/sysdep/unix/io.c index 6e3f1e4d..428f24cc 100644 --- a/sysdep/unix/io.c +++ b/sysdep/unix/io.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include "nest/bird.h" @@ -489,6 +491,11 @@ tm_format_datetime(char *x, struct timeformat *fmt_spec, bird_clock_t t) #define SOL_IPV6 IPPROTO_IPV6 #endif +#ifndef SOL_ICMPV6 +#define SOL_ICMPV6 IPPROTO_ICMPV6 +#endif + + static list sock_list; static struct birdsock *current_sock; static struct birdsock *stored_sock; @@ -552,6 +559,43 @@ sk_free(resource *r) } } +void +sk_set_rbsize(sock *s, uint val) +{ + ASSERT(s->rbuf_alloc == s->rbuf); + + if (s->rbsize == val) + return; + + s->rbsize = val; + xfree(s->rbuf_alloc); + s->rbuf_alloc = xmalloc(val); + s->rpos = s->rbuf = s->rbuf_alloc; +} + +void +sk_set_tbsize(sock *s, uint val) +{ + ASSERT(s->tbuf_alloc == s->tbuf); + + if (s->tbsize == val) + return; + + byte *old_tbuf = s->tbuf; + + s->tbsize = val; + s->tbuf = s->tbuf_alloc = xrealloc(s->tbuf_alloc, val); + s->tpos = s->tbuf + (s->tpos - old_tbuf); + s->ttx = s->tbuf + (s->ttx - old_tbuf); +} + +void +sk_set_tbuf(sock *s, void *tbuf) +{ + s->tbuf = tbuf ?: s->tbuf_alloc; + s->ttx = s->tpos = s->tbuf; +} + void sk_reallocate(sock *s) { @@ -703,11 +747,11 @@ sysio_register_cmsgs(sock *s) int ok = 1; if ((s->flags & SKF_LADDR_RX) && - (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IPV6, IPV6_RECVPKTINFO, &ok, sizeof(ok)) < 0)) return "IPV6_RECVPKTINFO"; if ((s->flags & SKF_TTL_RX) && - (setsockopt(s->fd, IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) + (setsockopt(s->fd, SOL_IPV6, IPV6_RECVHOPLIMIT, &ok, sizeof(ok)) < 0)) return "IPV6_RECVHOPLIMIT"; return NULL; @@ -722,10 +766,10 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) for (cm = CMSG_FIRSTHDR(msg); cm != NULL; cm = CMSG_NXTHDR(msg, cm)) { - if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_PKTINFO) + if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_PKTINFO) pi = (struct in6_pktinfo *) CMSG_DATA(cm); - if (cm->cmsg_level == IPPROTO_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) + if (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_HOPLIMIT) hlim = (int *) CMSG_DATA(cm); } @@ -749,32 +793,27 @@ sysio_process_rx_cmsgs(sock *s, struct msghdr *msg) return; } -/* static void sysio_prepare_tx_cmsgs(sock *s, struct msghdr *msg, void *cbuf, size_t cbuflen) { struct cmsghdr *cm; struct in6_pktinfo *pi; - if (!(s->flags & SKF_LADDR_TX)) - return; - msg->msg_control = cbuf; msg->msg_controllen = cbuflen; cm = CMSG_FIRSTHDR(msg); - cm->cmsg_level = IPPROTO_IPV6; + cm->cmsg_level = SOL_IPV6; cm->cmsg_type = IPV6_PKTINFO; cm->cmsg_len = CMSG_LEN(sizeof(*pi)); pi = (struct in6_pktinfo *) CMSG_DATA(cm); - set_inaddr(&pi->ipi6_addr, s->saddr); pi->ipi6_ifindex = s->iface ? s->iface->index : 0; + set_inaddr(&pi->ipi6_addr, s->saddr); msg->msg_controllen = cm->cmsg_len; - return; } -*/ + #endif static char * @@ -786,11 +825,6 @@ sk_set_ttl_int(sock *s) #else if (setsockopt(s->fd, SOL_IP, IP_TTL, &s->ttl, sizeof(s->ttl)) < 0) return "IP_TTL"; -#ifdef CONFIG_UNIX_DONTROUTE - int one = 1; - if (s->ttl == 1 && setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) - return "SO_DONTROUTE"; -#endif #endif return NULL; } @@ -801,6 +835,7 @@ sk_set_ttl_int(sock *s) static char * sk_setup(sock *s) { + int one = 1; int fd = s->fd; char *err = NULL; @@ -809,6 +844,41 @@ sk_setup(sock *s) if (s->type == SK_UNIX) return NULL; + if (ipa_nonzero(s->saddr) && !(s->flags & SKF_BIND)) + s->flags |= SKF_PKTINFO; + +#ifdef CONFIG_USE_HDRINCL + if ((s->type == SK_IP) && (s->flags & SKF_PKTINFO)) + { + s->flags &= ~SKF_PKTINFO; + s->flags |= SKF_HDRINCL; + if (setsockopt(fd, SOL_IP, IP_HDRINCL, &one, sizeof(one)) < 0) + ERR("IP_HDRINCL"); + } +#endif + + if (s->iface) + { +#ifdef SO_BINDTODEVICE + struct ifreq ifr; + strcpy(ifr.ifr_name, s->iface->name); + if (setsockopt(s->fd, SOL_SOCKET, SO_BINDTODEVICE, &ifr, sizeof(ifr)) < 0) + ERR("SO_BINDTODEVICE"); +#endif + +#ifdef CONFIG_UNIX_DONTROUTE + if (setsockopt(s->fd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) + ERR("SO_DONTROUTE"); +#endif + } + + if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) + goto bad; + + if (err = sysio_register_cmsgs(s)) + goto bad; + + #ifdef IPV6 if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0) WARN("IPV6_TCLASS"); @@ -821,15 +891,10 @@ sk_setup(sock *s) sk_set_priority(s, s->priority); #ifdef IPV6 - int v = 1; - if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0) + if ((s->flags & SKF_V6ONLY) && setsockopt(fd, SOL_IPV6, IPV6_V6ONLY, &one, sizeof(one)) < 0) WARN("IPV6_V6ONLY"); #endif - if ((s->ttl >= 0) && (err = sk_set_ttl_int(s))) - goto bad; - - err = sysio_register_cmsgs(s); bad: return err; } @@ -926,7 +991,7 @@ sk_set_broadcast(sock *s, int enable) int sk_set_ipv6_checksum(sock *s, int offset) { - if (setsockopt(s->fd, IPPROTO_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) + if (setsockopt(s->fd, SOL_IPV6, IPV6_CHECKSUM, &offset, sizeof(offset)) < 0) { log(L_ERR "sk_set_ipv6_checksum: IPV6_CHECKSUM: %m"); return -1; @@ -945,7 +1010,7 @@ sk_set_icmp_filter(sock *s, int p1, int p2) ICMP6_FILTER_SETPASS(p1, &f); ICMP6_FILTER_SETPASS(p2, &f); - if (setsockopt(s->fd, IPPROTO_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) + if (setsockopt(s->fd, SOL_ICMPV6, ICMP6_FILTER, &f, sizeof(f)) < 0) { log(L_ERR "sk_setup_icmp_filter: ICMP6_FILTER: %m"); return -1; @@ -961,7 +1026,7 @@ sk_setup_multicast(sock *s) int zero = 0; int index; - ASSERT(s->iface && s->iface->addr); + ASSERT(s->iface); index = s->iface->index; if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_HOPS, &s->ttl, sizeof(s->ttl)) < 0) @@ -971,9 +1036,6 @@ sk_setup_multicast(sock *s) if (setsockopt(s->fd, SOL_IPV6, IPV6_MULTICAST_IF, &index, sizeof(index)) < 0) ERR("IPV6_MULTICAST_IF"); - if (err = sysio_bind_to_iface(s)) - goto bad; - return 0; bad: @@ -981,18 +1043,17 @@ bad: return -1; } +#ifdef CONFIG_IPV6_GLIBC_20 +#define ipv6mr_interface ipv6mr_ifindex +#endif + int sk_join_group(sock *s, ip_addr maddr) { struct ipv6_mreq mreq; set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - -#ifdef CONFIG_IPV6_GLIBC_20 - mreq.ipv6mr_ifindex = s->iface->index; -#else mreq.ipv6mr_interface = s->iface->index; -#endif if (setsockopt(s->fd, SOL_IPV6, IPV6_JOIN_GROUP, &mreq, sizeof(mreq)) < 0) { @@ -1009,12 +1070,7 @@ sk_leave_group(sock *s, ip_addr maddr) struct ipv6_mreq mreq; set_inaddr(&mreq.ipv6mr_multiaddr, maddr); - -#ifdef CONFIG_IPV6_GLIBC_20 - mreq.ipv6mr_ifindex = s->iface->index; -#else mreq.ipv6mr_interface = s->iface->index; -#endif if (setsockopt(s->fd, SOL_IPV6, IPV6_LEAVE_GROUP, &mreq, sizeof(mreq)) < 0) { @@ -1032,7 +1088,7 @@ sk_setup_multicast(sock *s) { char *err; - ASSERT(s->iface && s->iface->addr); + ASSERT(s->iface); if (err = sysio_setup_multicast(s)) { @@ -1142,31 +1198,45 @@ int sk_open(sock *s) { int fd; - sockaddr sa; int one = 1; - int type = s->type; - int has_src = ipa_nonzero(s->saddr) || s->sport; + int do_bind = 0; + int bind_port = 0; + ip_addr bind_addr = IPA_NONE; + sockaddr sa; char *err; - switch (type) + switch (s->type) { case SK_TCP_ACTIVE: s->ttx = ""; /* Force s->ttx != s->tpos */ /* Fall thru */ case SK_TCP_PASSIVE: fd = socket(BIRD_PF, SOCK_STREAM, IPPROTO_TCP); + bind_port = s->sport; + bind_addr = s->saddr; + do_bind = bind_port || ipa_nonzero(bind_addr); break; + case SK_UDP: fd = socket(BIRD_PF, SOCK_DGRAM, IPPROTO_UDP); + bind_port = s->sport; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = 1; break; + case SK_IP: fd = socket(BIRD_PF, SOCK_RAW, s->dport); + bind_port = 0; + bind_addr = (s->flags & SKF_BIND) ? s->saddr : IPA_NONE; + do_bind = ipa_nonzero(bind_addr); break; + case SK_MAGIC: fd = s->fd; break; + default: - bug("sk_open() called for invalid sock type %d", type); + bug("sk_open() called for invalid sock type %d", s->type); } if (fd < 0) die("sk_open: socket: %m"); @@ -1175,31 +1245,28 @@ sk_open(sock *s) if (err = sk_setup(s)) goto bad; - if (has_src) + if (do_bind) { - int port; - - if (type == SK_IP) - port = 0; - else + if (bind_port) { - port = s->sport; if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) ERR("SO_REUSEADDR"); - + #ifdef CONFIG_NO_IFACE_BIND /* Workaround missing ability to bind to an iface */ - if ((type == SK_UDP) && s->iface && ipa_zero(s->saddr)) + if ((s->type == SK_UDP) && s->iface && ipa_zero(bind_addr)) { if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)) < 0) ERR("SO_REUSEPORT"); } #endif } - fill_in_sockaddr(&sa, s->saddr, s->iface, port); + + fill_in_sockaddr(&sa, bind_addr, s->iface, bind_port); if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) ERR("bind"); } + fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); if (s->password) @@ -1209,7 +1276,7 @@ sk_open(sock *s) goto bad_no_log; } - switch (type) + switch (s->type) { case SK_TCP_ACTIVE: if (connect(fd, (struct sockaddr *) &sa, sizeof(sa)) >= 0) @@ -1287,6 +1354,79 @@ sk_open_unix(sock *s, char *name) die("Unable to create control socket %s", name); } + +static inline int +sk_sendmsg(sock *s) +{ + struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; + byte cmsg_buf[CMSG_TX_SPACE]; + sockaddr dst; + + fill_in_sockaddr(&dst, s->daddr, s->iface, s->dport); + + struct msghdr msg = { + .msg_name = &dst, + .msg_namelen = sizeof(dst), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + +#ifdef CONFIG_USE_HDRINCL + byte hdr[20]; + struct iovec iov2[2] = { {hdr, 20}, iov }; + + if (s->flags & SKF_HDRINCL) + { + fill_ip_header(s, hdr, iov.iov_len); + msg.msg_iov = iov2; + msg.msg_iovlen = 2; + } +#endif + + if (s->flags & SKF_PKTINFO) + sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); + + return sendmsg(s->fd, &msg, 0); +} + +static inline int +sk_recvmsg(sock *s) +{ + struct iovec iov = {s->rbuf, s->rbsize}; + byte cmsg_buf[CMSG_RX_SPACE]; + sockaddr src; + + struct msghdr msg = { + .msg_name = &src, + .msg_namelen = sizeof(src), + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), + .msg_flags = 0 + }; + + int rv = recvmsg(s->fd, &msg, 0); + if (rv < 0) + return rv; + + //ifdef IPV4 + // if (cf_type == SK_IP) + // rv = ipv4_skip_header(pbuf, rv); + //endif + + get_sockaddr(&src, &s->faddr, NULL, &s->fport, 1); + sysio_process_rx_cmsgs(s, &msg); + + if (msg.msg_flags & MSG_TRUNC) + s->flags |= SKF_TRUNCATED; + else + s->flags &= ~SKF_TRUNCATED; + + return rv; +} + + static inline void reset_tx_buffer(sock *s) { s->ttx = s->tpos = s->tbuf; } static int @@ -1323,20 +1463,7 @@ sk_maybe_write(sock *s) if (s->tbuf == s->tpos) return 1; - sockaddr sa; - fill_in_sockaddr(&sa, s->daddr, s->iface, s->dport); - - struct iovec iov = {s->tbuf, s->tpos - s->tbuf}; - // byte cmsg_buf[CMSG_TX_SPACE]; - - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1}; - - // sysio_prepare_tx_cmsgs(s, &msg, cmsg_buf, sizeof(cmsg_buf)); - e = sendmsg(s->fd, &msg, 0); + e = sk_sendmsg(s); if (e < 0) { @@ -1346,6 +1473,9 @@ sk_maybe_write(sock *s) s->err_hook(s, errno); return -1; } + + if (!s->tx_hook) + reset_tx_buffer(s); return 0; } reset_tx_buffer(s); @@ -1408,12 +1538,15 @@ sk_send(sock *s, unsigned len) * * This is a sk_send() replacement for connection-less packet sockets * which allows destination of the packet to be chosen dynamically. + * Raw IP sockets should use 0 for @port. */ int sk_send_to(sock *s, unsigned len, ip_addr addr, unsigned port) { s->daddr = addr; - s->dport = port; + if (port) + s->dport = port; + s->ttx = s->tbuf; s->tpos = s->tbuf + len; return sk_maybe_write(s); @@ -1480,22 +1613,9 @@ sk_read(sock *s) return s->rx_hook(s, 0); default: { - sockaddr sa; int e; - struct iovec iov = {s->rbuf, s->rbsize}; - byte cmsg_buf[CMSG_RX_SPACE]; - - struct msghdr msg = { - .msg_name = &sa, - .msg_namelen = sizeof(sa), - .msg_iov = &iov, - .msg_iovlen = 1, - .msg_control = cmsg_buf, - .msg_controllen = sizeof(cmsg_buf), - .msg_flags = 0}; - - e = recvmsg(s->fd, &msg, 0); + e = sk_recvmsg(s); if (e < 0) { @@ -1503,10 +1623,8 @@ sk_read(sock *s) s->err_hook(s, errno); return 0; } - s->rpos = s->rbuf + e; - get_sockaddr(&sa, &s->faddr, NULL, &s->fport, 1); - sysio_process_rx_cmsgs(s, &msg); + s->rpos = s->rbuf + e; s->rx_hook(s, e); return 1; }