Skip to content

Commit

Permalink
Merge pull request #2 from Azure/master
Browse files Browse the repository at this point in the history
pull from origin
  • Loading branch information
shine4chen authored Mar 11, 2019
2 parents 9326978 + 5984e3a commit c0445f3
Show file tree
Hide file tree
Showing 21 changed files with 1,523 additions and 164 deletions.
19 changes: 13 additions & 6 deletions doc/swss-schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -695,10 +695,6 @@ Stores information for physical switch ports managed by the switch chip. Ports t

key = WARM_RESTART:name ; name is the name of SONiC docker or "system" for global configuration.

enable = "true" / "false" ; Default value as false.
; If "system" warm start knob is true, docker level knob will be ignored.
; If "system" warm start knob is false, docker level knob takes effect.

neighsyncd_timer = 1*4DIGIT ; neighsyncd_timer is the timer used for neighsyncd during the warm restart.
; Timer is started after we restored the neighborTable to internal data structures.
; neighborsyncd then starts to read all linux kernel entries and mark the entries in
Expand All @@ -715,8 +711,8 @@ Stores information for physical switch ports managed by the switch chip. Ports t
; Supported range: 1-3600.

teamsyncd_timer = 1*4DIGIT ; teamsyncd_timer holds the time interval utilized by teamsyncd during warm-restart episodes.
; The timer is started when teamsyncd starts. During the timer interval teamsyncd
; will preserver all LAG interface changes, but it will not apply them. The changes
; The timer is started when teamsyncd starts. During the timer interval teamsyncd
; will preserver all LAG interface changes, but it will not apply them. The changes
; will only be applied when the timer expired. During the changes application the stale
; LAG entries will be removed, the new LAG entries will be created.
; Supported range: 1-9999. 0 is invalid
Expand Down Expand Up @@ -762,6 +758,17 @@ Stores information for physical switch ports managed by the switch chip. Ports t
key = MGMT_PORT_TABLE|ifname ; ifname must be unique across PORT,INTF,VLAN,LAG TABLES
oper_status = "down" / "up" ; oper status

### WARM\_RESTART\_ENABLE\_TABLE
;Stores system warm start and docker warm start enable/disable configuration
;The configuration is persistent across warm reboot but not cold reboot.
;Status: work in progress

key = WARM_RESTART_ENABLE_TABLE:name ; name is the name of SONiC docker or "system" for global configuration.

enable = "true" / "false" ; Default value as false.
; If "system" warm start knob is true, docker level knob will be ignored.
; If "system" warm start knob is false, docker level knob takes effect.

### WARM\_RESTART\_TABLE
;Stores application and orchdameon warm start status
;Status: work in progress
Expand Down
297 changes: 247 additions & 50 deletions fpmsyncd/routesync.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,17 @@
#include "producerstatetable.h"
#include "fpmsyncd/fpmlink.h"
#include "fpmsyncd/routesync.h"
#include <string.h>

using namespace std;
using namespace swss;

#define VXLAN_IF_NAME_PREFIX "brvxlan"

RouteSync::RouteSync(RedisPipeline *pipeline) :
m_routeTable(pipeline, APP_ROUTE_TABLE_NAME, true),
m_routeTable(pipeline, APP_ROUTE_TABLE_NAME, true),
m_vnet_routeTable(pipeline, APP_VNET_RT_TABLE_NAME, true),
m_vnet_tunnelTable(pipeline, APP_VNET_RT_TUNNEL_TABLE_NAME, true),
m_warmStartHelper(pipeline, &m_routeTable, APP_ROUTE_TABLE_NAME, "bgp", "bgp")
{
m_nl_sock = nl_socket_alloc();
Expand All @@ -25,20 +30,41 @@ RouteSync::RouteSync(RedisPipeline *pipeline) :
void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj)
{
struct rtnl_route *route_obj = (struct rtnl_route *)obj;
struct nl_addr *dip;
char destipprefix[MAX_ADDR_SIZE + 1] = {0};

dip = rtnl_route_get_dst(route_obj);
nl_addr2str(dip, destipprefix, MAX_ADDR_SIZE);
SWSS_LOG_DEBUG("Receive new route message dest ip prefix: %s\n", destipprefix);

/* Supports IPv4 or IPv6 address, otherwise return immediately */
auto family = rtnl_route_get_family(route_obj);
if (family != AF_INET && family != AF_INET6)
{
SWSS_LOG_INFO("Unknown route family support: %s (object: %s)\n", destipprefix, nl_object_get_type(obj));
SWSS_LOG_INFO("Unknown route family support (object: %s)\n", nl_object_get_type(obj));
return;
}

/* Get the index of routing table */
unsigned int table_index = rtnl_route_get_table(route_obj);

/* Default routing table. This line may have problems. */
if (table_index == RT_TABLE_UNSPEC)
{
onRouteMsg(nlmsg_type, obj);
}
/* VNET route. We will handle VRF routes in the future. */
else
{
onVnetRouteMsg(nlmsg_type, obj);
}
}

/* Handle regular route (without vnet) */
void RouteSync::onRouteMsg(int nlmsg_type, struct nl_object *obj)
{
struct rtnl_route *route_obj = (struct rtnl_route *)obj;
struct nl_addr *dip;
char destipprefix[MAX_ADDR_SIZE + 1] = {0};

dip = rtnl_route_get_dst(route_obj);
nl_addr2str(dip, destipprefix, MAX_ADDR_SIZE);
SWSS_LOG_DEBUG("Receive new route message dest ip prefix: %s\n", destipprefix);

/*
* Upon arrival of a delete msg we could either push the change right away,
* or we could opt to defer it if we are going through a warm-reboot cycle.
Expand Down Expand Up @@ -74,13 +100,13 @@ void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj)
switch (rtnl_route_get_type(route_obj))
{
case RTN_BLACKHOLE:
{
vector<FieldValueTuple> fvVector;
FieldValueTuple fv("blackhole", "true");
fvVector.push_back(fv);
m_routeTable.set(destipprefix, fvVector);
return;
}
{
vector<FieldValueTuple> fvVector;
FieldValueTuple fv("blackhole", "true");
fvVector.push_back(fv);
m_routeTable.set(destipprefix, fvVector);
return;
}
case RTN_UNICAST:
break;

Expand All @@ -94,48 +120,16 @@ void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj)
return;
}

/* Geting nexthop lists */
string nexthops;
string ifnames;

struct nl_list_head *nhs = rtnl_route_get_nexthops(route_obj);
if (!nhs)
{
SWSS_LOG_INFO("Nexthop list is empty for %s\n", destipprefix);
return;
}

char ifname[IFNAMSIZ] = {0};
for (int i = 0; i < rtnl_route_get_nnexthops(route_obj); i++)
{
struct rtnl_nexthop *nexthop = rtnl_route_nexthop_n(route_obj, i);
struct nl_addr *addr = rtnl_route_nh_get_gateway(nexthop);
unsigned int ifindex = rtnl_route_nh_get_ifindex(nexthop);

if (addr != NULL)
{
char gwipprefix[MAX_ADDR_SIZE + 1] = {0};
nl_addr2str(addr, gwipprefix, MAX_ADDR_SIZE);
nexthops += gwipprefix;
}

rtnl_link_i2name(m_link_cache, ifindex, ifname, IFNAMSIZ);
/* Cannot get ifname. Possibly interfaces get re-created. */
if (!strlen(ifname))
{
rtnl_link_alloc_cache(m_nl_sock, AF_UNSPEC, &m_link_cache);
rtnl_link_i2name(m_link_cache, ifindex, ifname, IFNAMSIZ);
if (!strlen(ifname))
strcpy(ifname, "unknown");
}
ifnames += ifname;

if (i + 1 < rtnl_route_get_nnexthops(route_obj))
{
nexthops += string(",");
ifnames += string(",");
}
}
/* Get nexthop lists */
string nexthops = getNextHopGw(route_obj);
string ifnames = getNextHopIf(route_obj);

vector<FieldValueTuple> fvVector;
FieldValueTuple nh("nexthop", nexthops);
Expand Down Expand Up @@ -166,3 +160,206 @@ void RouteSync::onMsg(int nlmsg_type, struct nl_object *obj)
m_warmStartHelper.insertRefreshMap(kfv);
}
}

/* Handle vnet route */
void RouteSync::onVnetRouteMsg(int nlmsg_type, struct nl_object *obj)
{
struct rtnl_route *route_obj = (struct rtnl_route *)obj;

/* Get the destination IP prefix */
struct nl_addr *dip = rtnl_route_get_dst(route_obj);
char destipprefix[MAX_ADDR_SIZE + 1] = {0};
nl_addr2str(dip, destipprefix, MAX_ADDR_SIZE);

/* Get VRF index and VRF name */
unsigned int vrf_index = rtnl_route_get_table(route_obj);
char vrf_name[IFNAMSIZ] = {0};

/* If we cannot get the VRF name */
if (!getIfName(vrf_index, vrf_name, IFNAMSIZ))
{
SWSS_LOG_INFO("Fail to get the VRF name (table ID %u)\n", vrf_index);
return;
}

/* vrf name = vnet name */
string vnet_dip = vrf_name + string(":") + destipprefix;
SWSS_LOG_DEBUG("Receive new vnet route message %s\n", vnet_dip.c_str());

if (nlmsg_type == RTM_DELROUTE)
{
/* Duplicated delete as we do not know if it is a VXLAN tunnel route*/
m_vnet_routeTable.del(vnet_dip);
m_vnet_tunnelTable.del(vnet_dip);
return;
}
else if (nlmsg_type != RTM_NEWROUTE)
{
SWSS_LOG_INFO("Unknown message-type: %d for %s\n", nlmsg_type, vnet_dip.c_str());
return;
}

switch (rtnl_route_get_type(route_obj))
{
case RTN_UNICAST:
break;

/* We may support blackhole in the future */
case RTN_BLACKHOLE:
SWSS_LOG_INFO("Blackhole route is supported yet (%s)\n", vnet_dip.c_str());
return;

case RTN_MULTICAST:
case RTN_BROADCAST:
case RTN_LOCAL:
SWSS_LOG_INFO("BUM routes aren't supported yet (%s)\n", vnet_dip.c_str());
return;

default:
return;
}

struct nl_list_head *nhs = rtnl_route_get_nexthops(route_obj);
if (!nhs)
{
SWSS_LOG_INFO("Nexthop list is empty for %s\n", vnet_dip.c_str());
return;
}

/* Get nexthop lists */
string nexthops = getNextHopGw(route_obj);
string ifnames = getNextHopIf(route_obj);

/* If the the first interface name starts with VXLAN_IF_NAME_PREFIX,
the route is a VXLAN tunnel route. */
if (ifnames.find(VXLAN_IF_NAME_PREFIX) == 0)
{
vector<FieldValueTuple> fvVector;
FieldValueTuple ep("endpoint", nexthops);
fvVector.push_back(ep);

m_vnet_tunnelTable.set(vnet_dip, fvVector);
SWSS_LOG_DEBUG("%s set msg: %s %s\n",
APP_VNET_RT_TUNNEL_TABLE_NAME, vnet_dip.c_str(), nexthops.c_str());
return;
}
/* Regular VNET route */
else
{
vector<FieldValueTuple> fvVector;
FieldValueTuple idx("ifname", ifnames);
fvVector.push_back(idx);

/* If the route has at least one next hop gateway, e.g., nexthops does not only have ',' */
if (nexthops.length() + 1 > (unsigned int)rtnl_route_get_nnexthops(route_obj))
{
FieldValueTuple nh("nexthop", nexthops);
fvVector.push_back(nh);
SWSS_LOG_DEBUG("%s set msg: %s %s %s\n",
APP_VNET_RT_TABLE_NAME, vnet_dip.c_str(), ifnames.c_str(), nexthops.c_str());
}
else
{
SWSS_LOG_DEBUG("%s set msg: %s %s\n",
APP_VNET_RT_TABLE_NAME, vnet_dip.c_str(), ifnames.c_str());
}

m_vnet_routeTable.set(vnet_dip, fvVector);
}
}

/*
* Get interface/VRF name based on interface/VRF index
* @arg if_index Interface/VRF index
* @arg if_name String to store interface name
* @arg name_len Length of destination string, including terminating zero byte
*
* Return true if we successfully gets the interface/VRF name.
*/
bool RouteSync::getIfName(int if_index, char *if_name, size_t name_len)
{
if (!if_name || name_len == 0)
{
return false;
}

memset(if_name, 0, name_len);

/* Cannot get interface name. Possibly the interface gets re-created. */
if (!rtnl_link_i2name(m_link_cache, if_index, if_name, name_len))
{
rtnl_link_alloc_cache(m_nl_sock, AF_UNSPEC, &m_link_cache);
if (!rtnl_link_i2name(m_link_cache, if_index, if_name, name_len))
{
return false;
}
}

return true;
}

/*
* Get next hop gateway IP addresses
* @arg route_obj route object
*
* Return concatenation of IP addresses: gw0 + "," + gw1 + .... + "," + gwN
*/
string RouteSync::getNextHopGw(struct rtnl_route *route_obj)
{
string result = "";

for (int i = 0; i < rtnl_route_get_nnexthops(route_obj); i++)
{
struct rtnl_nexthop *nexthop = rtnl_route_nexthop_n(route_obj, i);
struct nl_addr *addr = rtnl_route_nh_get_gateway(nexthop);

/* Next hop gateway is not empty */
if (addr)
{
char gw_ip[MAX_ADDR_SIZE + 1] = {0};
nl_addr2str(addr, gw_ip, MAX_ADDR_SIZE);
result += gw_ip;
}

if (i + 1 < rtnl_route_get_nnexthops(route_obj))
{
result += string(",");
}
}

return result;
}

/*
* Get next hop interface names
* @arg route_obj route object
*
* Return concatenation of interface names: if0 + "," + if1 + .... + "," + ifN
*/
string RouteSync::getNextHopIf(struct rtnl_route *route_obj)
{
string result = "";

for (int i = 0; i < rtnl_route_get_nnexthops(route_obj); i++)
{
struct rtnl_nexthop *nexthop = rtnl_route_nexthop_n(route_obj, i);
/* Get the ID of next hop interface */
unsigned if_index = rtnl_route_nh_get_ifindex(nexthop);
char if_name[IFNAMSIZ] = "0";

/* If we cannot get the interface name */
if (!getIfName(if_index, if_name, IFNAMSIZ))
{
strcpy(if_name, "unknown");
}

result += if_name;

if (i + 1 < rtnl_route_get_nnexthops(route_obj))
{
result += string(",");
}
}

return result;
}
Loading

0 comments on commit c0445f3

Please sign in to comment.