Skip to content

Commit

Permalink
userspace: Add SRv6 tunnel support.
Browse files Browse the repository at this point in the history
SRv6 (Segment Routing IPv6) tunnel vport is responsible
for encapsulation and decapsulation the inner packets with
IPv6 header and an extended header called SRH
(Segment Routing Header). See spec in:

https://datatracker.ietf.org/doc/html/rfc8754

This patch implements SRv6 tunneling in userspace datapath.
It uses `remote_ip` and `local_ip` options as with existing
tunnel protocols. It also adds a dedicated `srv6_segs` option
to define a sequence of routers called segment list.

Signed-off-by: Nobuhiro MIKI <[email protected]>
Signed-off-by: Ilya Maximets <[email protected]>
  • Loading branch information
bobuhiro11 authored and igsilya committed Mar 29, 2023
1 parent 349112f commit 03fc1ad
Show file tree
Hide file tree
Showing 18 changed files with 450 additions and 1 deletion.
21 changes: 21 additions & 0 deletions Documentation/faq/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,27 @@ Q: Does Open vSwitch support GTP-U?
set int gtpu0 type=gtpu options:key=<teid> \
options:remote_ip=172.31.1.1

Q: Does Open vSwitch support SRv6?

A: Yes. Starting with version 3.2, the Open vSwitch userspace
datapath supports SRv6 (Segment Routing over IPv6). The following
example shows tunneling to fc00:300::1 via fc00:100::1 and fc00:200::1.
In the current implementation, if "IPv6 in IPv6" or "IPv4 in IPv6" packets
are routed to this interface, and these packets are not SRv6 packets, they
may be dropped, so be careful in workloads with a mix of these tunnels.
Also note the following restrictions:

* Segment list length is limited to 6.
* SRv6 packets with other than segments_left = 0 are simply dropped.

::

$ ovs-vsctl add-br br0
$ ovs-vsctl add-port br0 srv6_0 -- \
set int srv6_0 type=srv6 \
options:remote_ip=fc00:100::1 \
options:srv6_segs="fc00:100::1,fc00:200::1,fc00:300::1"

Q: How do I connect two bridges?

A: First, why do you want to do this? Two connected bridges are not much
Expand Down
1 change: 1 addition & 0 deletions Documentation/faq/releases.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ Q: Are all features available with all datapaths?
Tunnel - ERSPAN 4.18 2.10 2.10 NO
Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO
Tunnel - GTP-U NO NO 2.14 NO
Tunnel - SRv6 NO NO 3.2 NO
Tunnel - Bareudp 5.7 NO NO NO
QoS - Policing YES 1.1 2.6 NO
QoS - Shaping YES 1.1 NO NO
Expand Down
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ Post-v3.1.0
* ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started
with the --hw-rawio-access command line option. This allows the
process extra privileges when mapping physical interconnect memory.
- SRv6 Tunnel Protocol
* Added support for userspace datapath (only).


v3.1.0 - 16 Feb 2023
Expand Down
1 change: 1 addition & 0 deletions include/linux/openvswitch.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_IP6GRE = 109,
OVS_VPORT_TYPE_GTPU = 110,
OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */
OVS_VPORT_TYPE_SRV6 = 112, /* SRv6 tunnel. */
__OVS_VPORT_TYPE_MAX
};

Expand Down
1 change: 1 addition & 0 deletions include/sparse/netinet/in.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ struct sockaddr_in6 {
#define IPPROTO_HOPOPTS 0
#define IPPROTO_ICMP 1
#define IPPROTO_IGMP 2
#define IPPROTO_IPIP 4
#define IPPROTO_TCP 6
#define IPPROTO_UDP 17
#define IPPROTO_ROUTING 43
Expand Down
5 changes: 5 additions & 0 deletions lib/dpif-netlink-rtnl.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ vport_type_to_kind(enum ovs_vport_type type,
}
case OVS_VPORT_TYPE_GTPU:
return NULL;
case OVS_VPORT_TYPE_SRV6:
return "srv6";
case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";
case OVS_VPORT_TYPE_NETDEV:
Expand Down Expand Up @@ -319,6 +321,7 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
Expand Down Expand Up @@ -411,6 +414,7 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
Expand Down Expand Up @@ -519,6 +523,7 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
case OVS_VPORT_TYPE_SRV6:
case OVS_VPORT_TYPE_BAREUDP:
return dpif_netlink_rtnl_destroy(name);
case OVS_VPORT_TYPE_NETDEV:
Expand Down
5 changes: 5 additions & 0 deletions lib/dpif-netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -919,6 +919,9 @@ get_vport_type(const struct dpif_netlink_vport *vport)
case OVS_VPORT_TYPE_GTPU:
return "gtpu";

case OVS_VPORT_TYPE_SRV6:
return "srv6";

case OVS_VPORT_TYPE_BAREUDP:
return "bareudp";

Expand Down Expand Up @@ -957,6 +960,8 @@ netdev_to_ovs_vport_type(const char *type)
return OVS_VPORT_TYPE_GRE;
} else if (!strcmp(type, "gtpu")) {
return OVS_VPORT_TYPE_GTPU;
} else if (!strcmp(type, "srv6")) {
return OVS_VPORT_TYPE_SRV6;
} else if (!strcmp(type, "bareudp")) {
return OVS_VPORT_TYPE_BAREUDP;
} else {
Expand Down
130 changes: 130 additions & 0 deletions lib/netdev-native-tnl.c
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,136 @@ netdev_gtpu_build_header(const struct netdev *netdev,
return 0;
}

int
netdev_srv6_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data,
const struct netdev_tnl_build_header_params *params)
{
struct netdev_vport *dev = netdev_vport_cast(netdev);
struct netdev_tunnel_config *tnl_cfg;
const struct in6_addr *segs;
struct srv6_base_hdr *srh;
struct in6_addr *s;
ovs_be16 dl_type;
int err = 0;
int nr_segs;
int i;

ovs_mutex_lock(&dev->mutex);
tnl_cfg = &dev->tnl_cfg;

if (tnl_cfg->srv6_num_segs) {
nr_segs = tnl_cfg->srv6_num_segs;
segs = tnl_cfg->srv6_segs;
} else {
/*
* If explicit segment list setting is omitted, tunnel destination
* is considered to be the first segment list.
*/
nr_segs = 1;
segs = &params->flow->tunnel.ipv6_dst;
}

if (!ipv6_addr_equals(&segs[0], &params->flow->tunnel.ipv6_dst)) {
err = EINVAL;
goto out;
}

srh = netdev_tnl_ip_build_header(data, params, IPPROTO_ROUTING);
srh->rt_hdr.segments_left = nr_segs - 1;
srh->rt_hdr.type = IPV6_SRCRT_TYPE_4;
srh->rt_hdr.hdrlen = 2 * nr_segs;
srh->last_entry = nr_segs - 1;
srh->flags = 0;
srh->tag = 0;

dl_type = params->flow->dl_type;
if (dl_type == htons(ETH_TYPE_IP)) {
srh->rt_hdr.nexthdr = IPPROTO_IPIP;
} else if (dl_type == htons(ETH_TYPE_IPV6)) {
srh->rt_hdr.nexthdr = IPPROTO_IPV6;
} else {
err = EOPNOTSUPP;
goto out;
}

s = ALIGNED_CAST(struct in6_addr *,
(char *) srh + sizeof *srh);
for (i = 0; i < nr_segs; i++) {
/* Segment list is written to the header in reverse order. */
memcpy(s, &segs[nr_segs - i - 1], sizeof *s);
s++;
}

data->header_len += sizeof *srh + 8 * srh->rt_hdr.hdrlen;
data->tnl_type = OVS_VPORT_TYPE_SRV6;
out:
ovs_mutex_unlock(&dev->mutex);

return err;
}

void
netdev_srv6_push_header(const struct netdev *netdev OVS_UNUSED,
struct dp_packet *packet,
const struct ovs_action_push_tnl *data)
{
int ip_tot_size;

netdev_tnl_push_ip_header(packet, data->header,
data->header_len, &ip_tot_size);
}

struct dp_packet *
netdev_srv6_pop_header(struct dp_packet *packet)
{
const struct ovs_16aligned_ip6_hdr *nh = dp_packet_l3(packet);
size_t size = dp_packet_l3_size(packet) - IPV6_HEADER_LEN;
struct pkt_metadata *md = &packet->md;
struct flow_tnl *tnl = &md->tunnel;
const struct ip6_rt_hdr *rt_hdr;
uint8_t nw_proto = nh->ip6_nxt;
const void *data = nh + 1;
uint8_t nw_frag = 0;
unsigned int hlen;

/*
* Verifies that the routing header is present in the IPv6
* extension headers and that its type is SRv6.
*/
if (!parse_ipv6_ext_hdrs(&data, &size, &nw_proto, &nw_frag,
NULL, &rt_hdr)) {
goto err;
}

if (!rt_hdr || rt_hdr->type != IPV6_SRCRT_TYPE_4) {
goto err;
}

if (rt_hdr->segments_left > 0) {
VLOG_WARN_RL(&err_rl, "invalid srv6 segments_left=%d\n",
rt_hdr->segments_left);
goto err;
}

if (rt_hdr->nexthdr == IPPROTO_IPIP) {
packet->packet_type = htonl(PT_IPV4);
} else if (rt_hdr->nexthdr == IPPROTO_IPV6) {
packet->packet_type = htonl(PT_IPV6);
} else {
goto err;
}

pkt_metadata_init_tnl(md);
netdev_tnl_ip_extract_tnl_md(packet, tnl, &hlen);
dp_packet_reset_packet(packet, hlen);

return packet;
err:
dp_packet_delete(packet);
return NULL;
}

struct dp_packet *
netdev_vxlan_pop_header(struct dp_packet *packet)
{
Expand Down
10 changes: 10 additions & 0 deletions lib/netdev-native-tnl.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ netdev_gtpu_build_header(const struct netdev *netdev,
struct ovs_action_push_tnl *data,
const struct netdev_tnl_build_header_params *p);

struct dp_packet *netdev_srv6_pop_header(struct dp_packet *);

void netdev_srv6_push_header(const struct netdev *,
struct dp_packet *,
const struct ovs_action_push_tnl *);

int netdev_srv6_build_header(const struct netdev *,
struct ovs_action_push_tnl *,
const struct netdev_tnl_build_header_params *);

void
netdev_tnl_push_udp_header(const struct netdev *netdev,
struct dp_packet *packet,
Expand Down
53 changes: 53 additions & 0 deletions lib/netdev-vport.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,35 @@ parse_tunnel_ip(const char *value, bool accept_mcast, bool *flow,
return 0;
}

static int
parse_srv6_segs(char *s, struct in6_addr *segs, uint8_t *num_segs)
{
char *save_ptr = NULL;
char *token;

if (!s) {
return EINVAL;
}

*num_segs = 0;

while ((token = strtok_r(s, ",", &save_ptr)) != NULL) {
if (*num_segs == SRV6_MAX_SEGS) {
return EINVAL;
}

if (inet_pton(AF_INET6, token, segs) != 1) {
return EINVAL;
}

segs++;
(*num_segs)++;
s = NULL;
}

return 0;
}

enum tunnel_layers {
TNL_L2 = 1 << 0, /* 1 if a tunnel type can carry Ethernet traffic. */
TNL_L3 = 1 << 1 /* 1 if a tunnel type can carry L3 traffic. */
Expand All @@ -443,6 +472,8 @@ tunnel_supported_layers(const char *type,
return TNL_L3;
} else if (!strcmp(type, "bareudp")) {
return TNL_L3;
} else if (!strcmp(type, "srv6")) {
return TNL_L3;
} else {
return TNL_L2;
}
Expand Down Expand Up @@ -750,6 +781,17 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
goto out;
}
}
} else if (!strcmp(node->key, "srv6_segs")) {
err = parse_srv6_segs(node->value,
tnl_cfg.srv6_segs,
&tnl_cfg.srv6_num_segs);

switch (err) {
case EINVAL:
ds_put_format(&errors, "%s: bad %s 'srv6_segs'\n",
name, node->value);
break;
}
} else if (!strcmp(node->key, "payload_type")) {
if (!strcmp(node->value, "mpls")) {
tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
Expand Down Expand Up @@ -1290,6 +1332,17 @@ netdev_vport_tunnel_register(void)
},
{{NULL, NULL, 0, 0}}
},
{ "srv6_sys",
{
TUNNEL_FUNCTIONS_COMMON,
.type = "srv6",
.build_header = netdev_srv6_build_header,
.push_header = netdev_srv6_push_header,
.pop_header = netdev_srv6_pop_header,
.get_ifindex = NETDEV_VPORT_GET_IFINDEX,
},
{{NULL, NULL, 0, 0}}
},

};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
Expand Down
4 changes: 4 additions & 0 deletions lib/netdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,10 @@ struct netdev_tunnel_config {
bool erspan_idx_flow;
bool erspan_dir_flow;
bool erspan_hwid_flow;

uint8_t srv6_num_segs;
#define SRV6_MAX_SEGS 6
struct in6_addr srv6_segs[SRV6_MAX_SEGS];
};

void netdev_run(void);
Expand Down
15 changes: 15 additions & 0 deletions lib/packets.h
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,10 @@ char *ip_parse_cidr_len(const char *s, int *n, ovs_be32 *ip,
#define IPPROTO_IGMP 2
#endif

#ifndef IPPROTO_IPIP
#define IPPROTO_IPIP 4
#endif

#ifndef IPPROTO_UDPLITE
#define IPPROTO_UDPLITE 136
#endif
Expand Down Expand Up @@ -1523,6 +1527,17 @@ BUILD_ASSERT_DECL(sizeof(struct vxlanhdr) == 8);
#define VXLAN_F_GPE 0x4000
#define VXLAN_HF_GPE 0x04000000

/* SRv6 protocol header. */
#define IPV6_SRCRT_TYPE_4 4
#define SRV6_BASE_HDR_LEN 8
struct srv6_base_hdr {
struct ip6_rt_hdr rt_hdr;
uint8_t last_entry;
uint8_t flags;
ovs_be16 tag;
};
BUILD_ASSERT_DECL(sizeof(struct srv6_base_hdr) == SRV6_BASE_HDR_LEN);

/* Input values for PACKET_TYPE macros have to be in host byte order.
* The _BE postfix indicates result is in network byte order. Otherwise result
* is in host byte order. */
Expand Down
Loading

0 comments on commit 03fc1ad

Please sign in to comment.