Better packet priority and traffic class handling.

Implements support for IPv6 traffic class, sets higher priority for OSPF
and RIP outgoing packets by default and allows to configure ToS/DS/TClass
IP header field and the local priority of outgoing packets.
This commit is contained in:
Ondrej Zajicek 2013-06-24 16:37:30 +02:00
parent fad04c750c
commit ef4a50be10
13 changed files with 113 additions and 24 deletions

View file

@ -567,6 +567,22 @@ to zero to disable it. An empty <cf><m/switch/</cf> is equivalent to <cf/on/
<cf>interface "eth*" 192.168.1.0/24;</cf> - start the protocol on all
ethernet interfaces that have address from 192.168.1.0/24.
<tag><label id="dsc-prio">tx class|dscp <m/num/</tag>
This option specifies the value of ToS/DS/Class field in IP
headers of the outgoing protocol packets. This may affect how the
protocol packets are processed by the network relative to the
other network traffic. With <cf/class/ keyword, the value
(0-255) is used for the whole ToS/Class octet (but two bits
reserved for ECN are ignored). With <cf/dscp/ keyword, the
value (0-63) is used just for the DS field in the
octet. Default value is 0xc0 (DSCP 0x30 - CS6).
<tag>tx priority <m/num/</tag>
This option specifies the local packet priority. This may
affect how the protocol packets are processed in the local TX
queues. This option is Linux specific. Default value is 7
(highest priority, privileged traffic).
<tag><label id="dsc-pass">password "<m/password/" [ { id <m/num/; generate from <m/time/; generate to <m/time/; accept from <m/time/; accept to <m/time/; } ]</tag>
Specifies a password that can be used by the protocol. Password option can
be used more times to specify more passwords. If more passwords are
@ -2220,6 +2236,11 @@ protocol ospf &lt;name&gt; {
prefix) is propagated. It is possible that some hardware
drivers or platforms do not implement this feature. Default value is no.
<tag>tx class|dscp|priority <m/num/</tag>
These options specify the ToS/DiffServ/Traffic class/Priority
of the outgoing OSPF packets. See <ref id="dsc-prio" name="tx
class"> common option for detailed description.
<tag>ecmp weight <M>num</M></tag>
When ECMP (multipath) routes are allowed, this value specifies
a relative weight used for nexthops going through the iface.
@ -2748,13 +2769,26 @@ makes it pretty much obsolete. (It is still usable on very small networks.)
neighbors, that is not configurable. Default: never.
</descrip>
<p>There are two options that can be specified per-interface. First is <cf>metric</cf>, with
default one. Second is <cf>mode multicast|broadcast|quiet|nolisten|version1</cf>, it selects mode for
rip to work in. If nothing is specified, rip runs in multicast mode. <cf>version1</cf> is
currently equivalent to <cf>broadcast</cf>, and it makes RIP talk to a broadcast address even
through multicast mode is possible. <cf>quiet</cf> option means that RIP will not transmit
any periodic messages to this interface and <cf>nolisten</cf> means that RIP will send to this
interface but not listen to it.
<p>There are some options that can be specified per-interface:
<descrip>
<tag>metric <m/num/</tag>
This option specifies the metric of the interface. Valid
<tag>mode multicast|broadcast|quiet|nolisten|version1</tag>
This option selects the mode for RIP to work in. If nothing is
specified, RIP runs in multicast mode. <cf/version1/ is
currently equivalent to <cf/broadcast/, and it makes RIP talk
to a broadcast address even through multicast mode is
possible. <cf/quiet/ option means that RIP will not transmit
any periodic messages to this interface and <cf/nolisten/
means that RIP will send to this interface butnot listen to it.
<tag>tx class|dscp|priority <m/num/</tag>
These options specify the ToS/DiffServ/Traffic class/Priority
of the outgoing RIP packets. See <ref id="dsc-prio" name="tx
class"> common option for detailed description.
</descrip>
<p>The following options generally override behavior specified in RFC. If you use any of these
options, BIRD will no longer be RFC-compliant, which means it will not be able to talk to anything

View file

@ -128,11 +128,6 @@ static inline byte * ipv6_put_addr(byte *buf, ip_addr a)
return buf+16;
}
/*
* RFC 1883 defines packet precendece, but RFC 2460 replaces it
* by generic Traffic Class ID with no defined semantics. Better
* not use it yet.
*/
#define IP_PREC_INTERNET_CONTROL -1
#define IP_PREC_INTERNET_CONTROL 0xc0
#endif

View file

@ -20,7 +20,8 @@ typedef struct birdsock {
void *data; /* User data */
ip_addr saddr, daddr; /* IPA_NONE = unspecified */
unsigned sport, dport; /* 0 = unspecified (for IP: protocol type) */
int tos; /* TOS and priority, -1 = default */
int tos; /* TOS / traffic class, -1 = default */
int priority; /* Local socket priority, -1 = default */
int ttl; /* Time To Live, -1 = default */
u32 flags;
struct iface *iface; /* Interface; specify this for broad/multicast sockets */
@ -81,6 +82,7 @@ sk_send_buffer_empty(sock *sk)
return sk->tbuf == sk->tpos;
}
extern int sk_priority_control; /* Suggested priority for control traffic, should be sysdep define */
/* Socket flags */

View file

@ -48,7 +48,7 @@ CF_KEYWORDS(RECEIVE, LIMIT, ACTION, WARN, BLOCK, RESTART, DISABLE, KEEP, FILTERE
CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFACES)
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
@ -65,7 +65,7 @@ CF_ENUM(T_ENUM_ROA, ROA_, UNKNOWN, VALID, INVALID)
%type <ro> roa_args
%type <rot> roa_table_arg
%type <sd> sym_args
%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted
%type <i> proto_start echo_mask echo_size debug_mask debug_list debug_flag mrtdump_mask mrtdump_list mrtdump_flag export_or_preexport roa_mode limit_action tab_sorted tos
%type <ps> proto_patt proto_patt2
%type <g> limit_spec
@ -277,6 +277,10 @@ iface_patt:
iface_patt_init iface_patt_list
;
tos:
CLASS expr { $$ = $2 & 0xfc; if (($2 < 0) || ($2 > 255)) cf_error("TX class must be in range 0-255"); }
| DSCP expr { $$ = ($2 & 0x3f) << 2; if (($2 < 0) || ($2 > 63)) cf_error("TX DSCP must be in range 0-63"); }
;
/* Direct device route protocol */

View file

@ -131,7 +131,7 @@ CF_KEYWORDS(NONE, SIMPLE, AUTHENTICATION, STRICT, CRYPTOGRAPHIC)
CF_KEYWORDS(ELIGIBLE, POLL, NETWORKS, HIDDEN, VIRTUAL, CHECK, LINK)
CF_KEYWORDS(RX, BUFFER, LARGE, NORMAL, STUBNET, HIDDEN, SUMMARY, TAG, EXTERNAL)
CF_KEYWORDS(WAIT, DELAY, LSADB, ECMP, LIMIT, WEIGHT, NSSA, TRANSLATOR, STABILITY)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK)
CF_KEYWORDS(GLOBAL, LSID, ROUTER, SELF, INSTANCE, REAL, NETMASK, TX, PRIORITY)
%type <t> opttext
%type <ld> lsadb_args
@ -305,6 +305,8 @@ ospf_iface_item:
| RX BUFFER LARGE { OSPF_PATT->rxbuf = OSPF_RXBUF_LARGE ; }
| RX BUFFER NORMAL { OSPF_PATT->rxbuf = OSPF_RXBUF_NORMAL ; }
| RX BUFFER expr { OSPF_PATT->rxbuf = $3 ; if (($3 < OSPF_RXBUF_MINSIZE) || ($3 > OSPF_MAX_PKT_SIZE)) cf_error("Buffer size must be in range 256-65535"); }
| TX tos { OSPF_PATT->tx_tos = $2; }
| TX PRIORITY expr { OSPF_PATT->tx_priority = $3; }
| password_list
;
@ -367,6 +369,8 @@ ospf_iface_start:
init_list(&OSPF_PATT->nbma_list);
OSPF_PATT->autype = OSPF_AUTH_NONE;
OSPF_PATT->ptp_netmask = 2; /* not specified */
OSPF_PATT->tx_tos = IP_PREC_INTERNET_CONTROL;
OSPF_PATT->tx_priority = sk_priority_control;
reset_passwords();
}
;

View file

@ -77,7 +77,8 @@ ospf_sk_open(struct ospf_iface *ifa)
sk->dport = OSPF_PROTO;
sk->saddr = IPA_NONE;
sk->tos = IP_PREC_INTERNET_CONTROL;
sk->tos = ifa->cf->tx_tos;
sk->priority = ifa->cf->tx_priority;
sk->rx_hook = ospf_rx_hook;
sk->tx_hook = ospf_tx_hook;
sk->err_hook = ospf_err_hook;
@ -659,7 +660,10 @@ ospf_iface_reconfigure(struct ospf_iface *ifa, struct ospf_iface_patt *new)
if (ifa->stub != new_stub)
return 0;
if (new->real_bcast != ifa->cf->real_bcast)
/* Change of these options would require to reset the iface socket */
if ((new->real_bcast != ifa->cf->real_bcast) ||
(new->tx_tos != ifa->cf->tx_tos) ||
(new->tx_priority != ifa->cf->tx_priority))
return 0;
ifa->cf = new;

View file

@ -800,6 +800,8 @@ struct ospf_iface_patt
u32 priority;
u32 voa;
u32 vid;
int tx_tos;
int tx_priority;
u16 rxbuf;
#define OSPF_RXBUF_NORMAL 0
#define OSPF_RXBUF_LARGE 1

View file

@ -27,7 +27,7 @@ CF_DECLS
CF_KEYWORDS(RIP, INFINITY, METRIC, PORT, PERIOD, GARBAGE, TIMEOUT,
MODE, BROADCAST, MULTICAST, QUIET, NOLISTEN, VERSION1,
AUTHENTICATION, NONE, PLAINTEXT, MD5,
HONOR, NEVER, NEIGHBOR, ALWAYS,
HONOR, NEVER, NEIGHBOR, ALWAYS, TX, PRIORITY,
RIP_METRIC, RIP_TAG)
%type <i> rip_mode rip_auth
@ -76,6 +76,8 @@ rip_mode:
rip_iface_item:
| METRIC expr { RIP_IPATT->metric = $2; }
| MODE rip_mode { RIP_IPATT->mode |= $2; }
| TX tos { RIP_IPATT->tx_tos = $2; }
| TX PRIORITY expr { RIP_IPATT->tx_priority = $3; }
;
rip_iface_opts:
@ -94,6 +96,8 @@ rip_iface_init:
add_tail(&RIP_CFG->iface_list, NODE this_ipatt);
init_list(&this_ipatt->ipn_list);
RIP_IPATT->metric = 1;
RIP_IPATT->tx_tos = IP_PREC_INTERNET_CONTROL;
RIP_IPATT->tx_priority = sk_priority_control;
}
;

View file

@ -707,7 +707,8 @@ new_iface(struct proto *p, struct iface *new, unsigned long flags, struct iface_
if (new)
{
rif->sock->ttl = 1;
rif->sock->tos = IP_PREC_INTERNET_CONTROL;
rif->sock->tos = PATT->tx_tos;
rif->sock->priority = PATT->tx_priority;
rif->sock->flags = SKF_LADDR_RX;
}
@ -1007,7 +1008,9 @@ static int
rip_pat_compare(struct rip_patt *a, struct rip_patt *b)
{
return ((a->metric == b->metric) &&
(a->mode == b->mode));
(a->mode == b->mode) &&
(a->tx_tos == b->tx_tos) &&
(a->tx_priority == b->tx_priority));
}
static int

View file

@ -128,6 +128,8 @@ struct rip_patt {
#define IM_QUIET 4
#define IM_NOLISTEN 8
#define IM_VERSION1 16
int tx_tos;
int tx_priority;
};
struct rip_proto_config {

View file

@ -284,3 +284,12 @@ sk_set_min_ttl6(sock *s, int ttl)
#endif
int sk_priority_control = -1;
static int
sk_set_priority(sock *s, int prio UNUSED)
{
log(L_WARN "Socket priority not supported");
return -1;
}

View file

@ -310,3 +310,22 @@ sk_set_min_ttl6(sock *s, int ttl)
}
#endif
#ifndef IPV6_TCLASS
#define IPV6_TCLASS 67
#endif
int sk_priority_control = 7;
static int
sk_set_priority(sock *s, int prio)
{
if (setsockopt(s->fd, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)) < 0)
{
log(L_WARN "sk_set_priority: setsockopt: %m");
return -1;
}
return 0;
}

View file

@ -598,7 +598,7 @@ sock_new(pool *p)
sock *s = ralloc(p, &sk_class);
s->pool = p;
// s->saddr = s->daddr = IPA_NONE;
s->tos = s->ttl = -1;
s->tos = s->priority = s->ttl = -1;
s->fd = -1;
return s;
}
@ -783,11 +783,18 @@ sk_setup(sock *s)
ERR("fcntl(O_NONBLOCK)");
if (s->type == SK_UNIX)
return NULL;
#ifndef IPV6
#ifdef IPV6
if ((s->tos >= 0) && setsockopt(fd, SOL_IPV6, IPV6_TCLASS, &s->tos, sizeof(s->tos)) < 0)
WARN("IPV6_TCLASS");
#else
if ((s->tos >= 0) && setsockopt(fd, SOL_IP, IP_TOS, &s->tos, sizeof(s->tos)) < 0)
WARN("IP_TOS");
#endif
if (s->priority >= 0)
sk_set_priority(s, s->priority);
#ifdef IPV6
int v = 1;
if ((s->flags & SKF_V6ONLY) && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &v, sizeof(v)) < 0)