Skip to content

Commit

Permalink
Merge branch 'bridge-mdb-bulk-delete'
Browse files Browse the repository at this point in the history
Ido Schimmel says:

====================
Add MDB bulk deletion support

This patchset adds MDB bulk deletion support, allowing user space to
request the deletion of matching entries instead of dumping the entire
MDB and issuing a separate deletion request for each matching entry.
Support is added in both the bridge and VXLAN drivers in a similar
fashion to the existing FDB bulk deletion support.

The parameters according to which bulk deletion can be performed are
similar to the FDB ones, namely: Destination port, VLAN ID, state (e.g.,
"permanent"), routing protocol, source / destination VNI, destination IP
and UDP port. Flushing based on flags (e.g., "offload", "fast_leave",
"added_by_star_ex", "blocked") is not currently supported, but can be
added in the future, if a use case arises.

Patch #1 adds a new uAPI attribute to allow specifying the state mask
according to which bulk deletion will be performed, if any.

Patch #2 adds a new policy according to which bulk deletion requests
(with 'NLM_F_BULK' flag set) will be parsed.

Patches #3-#4 add a new NDO for MDB bulk deletion and invoke it from the
rtnetlink code when a bulk deletion request is made.

Patches #5-#6 implement the MDB bulk deletion NDO in the bridge and
VXLAN drivers, respectively.

Patch #7 allows user space to issue MDB bulk deletion requests by no
longer rejecting the 'NLM_F_BULK' flag when it is set in 'RTM_DELMDB'
requests.

Patches #8-grate-driver#9 add selftests for both drivers, for both good and bad
flows.

iproute2 changes can be found here [1].

https://github.com/idosch/iproute2/tree/submit/mdb_flush_v1
====================

Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
davem330 committed Dec 20, 2023
2 parents b6895d0 + c3e87a7 commit d7a39d3
Show file tree
Hide file tree
Showing 11 changed files with 749 additions and 31 deletions.
1 change: 1 addition & 0 deletions drivers/net/vxlan/vxlan_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -3235,6 +3235,7 @@ static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_fdb_get = vxlan_fdb_get,
.ndo_mdb_add = vxlan_mdb_add,
.ndo_mdb_del = vxlan_mdb_del,
.ndo_mdb_del_bulk = vxlan_mdb_del_bulk,
.ndo_mdb_dump = vxlan_mdb_dump,
.ndo_mdb_get = vxlan_mdb_get,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
Expand Down
174 changes: 150 additions & 24 deletions drivers/net/vxlan/vxlan_mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ struct vxlan_mdb_config {
u8 rt_protocol;
};

struct vxlan_mdb_flush_desc {
union vxlan_addr remote_ip;
__be32 src_vni;
__be32 remote_vni;
__be16 remote_port;
u8 rt_protocol;
};

static const struct rhashtable_params vxlan_mdb_rht_params = {
.head_offset = offsetof(struct vxlan_mdb_entry, rhnode),
.key_offset = offsetof(struct vxlan_mdb_entry, key),
Expand Down Expand Up @@ -1306,6 +1314,145 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
return err;
}

static const struct nla_policy
vxlan_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
[MDBE_ATTR_DST] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
sizeof(struct in6_addr)),
[MDBE_ATTR_DST_PORT] = { .type = NLA_U16 },
[MDBE_ATTR_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
[MDBE_ATTR_SRC_VNI] = NLA_POLICY_FULL_RANGE(NLA_U32, &vni_range),
[MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
};

static int vxlan_mdb_flush_desc_init(struct vxlan_dev *vxlan,
struct vxlan_mdb_flush_desc *desc,
struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
int err;

if (entry->ifindex && entry->ifindex != vxlan->dev->ifindex) {
NL_SET_ERR_MSG_MOD(extack, "Invalid port net device");
return -EINVAL;
}

if (entry->vid) {
NL_SET_ERR_MSG_MOD(extack, "VID must not be specified");
return -EINVAL;
}

if (!tb[MDBA_SET_ENTRY_ATTRS])
return 0;

err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
tb[MDBA_SET_ENTRY_ATTRS],
vxlan_mdbe_attrs_del_bulk_pol, extack);
if (err)
return err;

if (mdbe_attrs[MDBE_ATTR_STATE_MASK]) {
u8 state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);

if ((state_mask & MDB_PERMANENT) && !(entry->state & MDB_PERMANENT)) {
NL_SET_ERR_MSG_MOD(extack, "Only permanent MDB entries are supported");
return -EINVAL;
}
}

if (mdbe_attrs[MDBE_ATTR_RTPROT])
desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);

if (mdbe_attrs[MDBE_ATTR_DST])
vxlan_nla_get_addr(&desc->remote_ip, mdbe_attrs[MDBE_ATTR_DST]);

if (mdbe_attrs[MDBE_ATTR_DST_PORT])
desc->remote_port =
cpu_to_be16(nla_get_u16(mdbe_attrs[MDBE_ATTR_DST_PORT]));

if (mdbe_attrs[MDBE_ATTR_VNI])
desc->remote_vni =
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_VNI]));

if (mdbe_attrs[MDBE_ATTR_SRC_VNI])
desc->src_vni =
cpu_to_be32(nla_get_u32(mdbe_attrs[MDBE_ATTR_SRC_VNI]));

return 0;
}

static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
struct vxlan_mdb_entry *mdb_entry,
const struct vxlan_mdb_flush_desc *desc)
{
struct vxlan_mdb_remote *remote, *tmp;

list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list) {
struct vxlan_rdst *rd = rtnl_dereference(remote->rd);
__be32 remote_vni;

if (desc->remote_ip.sa.sa_family &&
!vxlan_addr_equal(&desc->remote_ip, &rd->remote_ip))
continue;

/* Encapsulation is performed with source VNI if remote VNI
* is not set.
*/
remote_vni = rd->remote_vni ? : mdb_entry->key.vni;
if (desc->remote_vni && desc->remote_vni != remote_vni)
continue;

if (desc->remote_port && desc->remote_port != rd->remote_port)
continue;

if (desc->rt_protocol &&
desc->rt_protocol != remote->rt_protocol)
continue;

vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
}
}

static void vxlan_mdb_flush(struct vxlan_dev *vxlan,
const struct vxlan_mdb_flush_desc *desc)
{
struct vxlan_mdb_entry *mdb_entry;
struct hlist_node *tmp;

/* The removal of an entry cannot trigger the removal of another entry
* since entries are always added to the head of the list.
*/
hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
if (desc->src_vni && desc->src_vni != mdb_entry->key.vni)
continue;

vxlan_mdb_remotes_flush(vxlan, mdb_entry, desc);
/* Entry will only be removed if its remotes list is empty. */
vxlan_mdb_entry_put(vxlan, mdb_entry);
}
}

int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_mdb_flush_desc desc = {};
int err;

ASSERT_RTNL();

err = vxlan_mdb_flush_desc_init(vxlan, &desc, tb, extack);
if (err)
return err;

vxlan_mdb_flush(vxlan, &desc);

return 0;
}

static const struct nla_policy vxlan_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
Expand Down Expand Up @@ -1575,29 +1722,6 @@ static void vxlan_mdb_check_empty(void *ptr, void *arg)
WARN_ON_ONCE(1);
}

static void vxlan_mdb_remotes_flush(struct vxlan_dev *vxlan,
struct vxlan_mdb_entry *mdb_entry)
{
struct vxlan_mdb_remote *remote, *tmp;

list_for_each_entry_safe(remote, tmp, &mdb_entry->remotes, list)
vxlan_mdb_remote_del(vxlan, mdb_entry, remote);
}

static void vxlan_mdb_entries_flush(struct vxlan_dev *vxlan)
{
struct vxlan_mdb_entry *mdb_entry;
struct hlist_node *tmp;

/* The removal of an entry cannot trigger the removal of another entry
* since entries are always added to the head of the list.
*/
hlist_for_each_entry_safe(mdb_entry, tmp, &vxlan->mdb_list, mdb_node) {
vxlan_mdb_remotes_flush(vxlan, mdb_entry);
vxlan_mdb_entry_put(vxlan, mdb_entry);
}
}

int vxlan_mdb_init(struct vxlan_dev *vxlan)
{
int err;
Expand All @@ -1613,7 +1737,9 @@ int vxlan_mdb_init(struct vxlan_dev *vxlan)

void vxlan_mdb_fini(struct vxlan_dev *vxlan)
{
vxlan_mdb_entries_flush(vxlan);
struct vxlan_mdb_flush_desc desc = {};

vxlan_mdb_flush(vxlan, &desc);
WARN_ON_ONCE(vxlan->cfg.flags & VXLAN_F_MDB);
rhashtable_free_and_destroy(&vxlan->mdb_tbl, vxlan_mdb_check_empty,
NULL);
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/vxlan/vxlan_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ int vxlan_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack);
int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
int vxlan_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
int vxlan_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid,
u32 seq, struct netlink_ext_ack *extack);
struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan,
Expand Down
6 changes: 6 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -1329,6 +1329,9 @@ struct netdev_net_notifier {
* int (*ndo_mdb_del)(struct net_device *dev, struct nlattr *tb[],
* struct netlink_ext_ack *extack);
* Deletes the MDB entry from dev.
* int (*ndo_mdb_del_bulk)(struct net_device *dev, struct nlattr *tb[],
* struct netlink_ext_ack *extack);
* Bulk deletes MDB entries from dev.
* int (*ndo_mdb_dump)(struct net_device *dev, struct sk_buff *skb,
* struct netlink_callback *cb);
* Dumps MDB entries from dev. The first argument (marker) in the netlink
Expand Down Expand Up @@ -1611,6 +1614,9 @@ struct net_device_ops {
int (*ndo_mdb_del)(struct net_device *dev,
struct nlattr *tb[],
struct netlink_ext_ack *extack);
int (*ndo_mdb_del_bulk)(struct net_device *dev,
struct nlattr *tb[],
struct netlink_ext_ack *extack);
int (*ndo_mdb_dump)(struct net_device *dev,
struct sk_buff *skb,
struct netlink_callback *cb);
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/if_bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ enum {
MDBE_ATTR_VNI,
MDBE_ATTR_IFINDEX,
MDBE_ATTR_SRC_VNI,
MDBE_ATTR_STATE_MASK,
__MDBE_ATTR_MAX,
};
#define MDBE_ATTR_MAX (__MDBE_ATTR_MAX - 1)
Expand Down
1 change: 1 addition & 0 deletions net/bridge/br_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_get = br_fdb_get,
.ndo_mdb_add = br_mdb_add,
.ndo_mdb_del = br_mdb_del,
.ndo_mdb_del_bulk = br_mdb_del_bulk,
.ndo_mdb_dump = br_mdb_dump,
.ndo_mdb_get = br_mdb_get,
.ndo_bridge_getlink = br_getlink,
Expand Down
133 changes: 133 additions & 0 deletions net/bridge/br_mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
return err;
}

struct br_mdb_flush_desc {
u32 port_ifindex;
u16 vid;
u8 rt_protocol;
u8 state;
u8 state_mask;
};

static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
[MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
};

static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc,
struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
int err;

desc->port_ifindex = entry->ifindex;
desc->vid = entry->vid;
desc->state = entry->state;

if (!tb[MDBA_SET_ENTRY_ATTRS])
return 0;

err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
tb[MDBA_SET_ENTRY_ATTRS],
br_mdbe_attrs_del_bulk_pol, extack);
if (err)
return err;

if (mdbe_attrs[MDBE_ATTR_STATE_MASK])
desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);

if (mdbe_attrs[MDBE_ATTR_RTPROT])
desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);

return 0;
}

static void br_mdb_flush_host(struct net_bridge *br,
struct net_bridge_mdb_entry *mp,
const struct br_mdb_flush_desc *desc)
{
u8 state;

if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex)
return;

if (desc->rt_protocol)
return;

state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0;
if (desc->state_mask && (state & desc->state_mask) != desc->state)
return;

br_multicast_host_leave(mp, true);
if (!mp->ports && netif_running(br->dev))
mod_timer(&mp->timer, jiffies);
}

static void br_mdb_flush_pgs(struct net_bridge *br,
struct net_bridge_mdb_entry *mp,
const struct br_mdb_flush_desc *desc)
{
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port_group *p;

for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) {
u8 state;

if (desc->port_ifindex &&
desc->port_ifindex != p->key.port->dev->ifindex) {
pp = &p->next;
continue;
}

if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) {
pp = &p->next;
continue;
}

state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0;
if (desc->state_mask &&
(state & desc->state_mask) != desc->state) {
pp = &p->next;
continue;
}

br_multicast_del_pg(mp, p, pp);
}
}

static void br_mdb_flush(struct net_bridge *br,
const struct br_mdb_flush_desc *desc)
{
struct net_bridge_mdb_entry *mp;

spin_lock_bh(&br->multicast_lock);

/* Safe variant is not needed because entries are removed from the list
* upon group timer expiration or bridge deletion.
*/
hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
if (desc->vid && desc->vid != mp->addr.vid)
continue;

br_mdb_flush_host(br, mp, desc);
br_mdb_flush_pgs(br, mp, desc);
}

spin_unlock_bh(&br->multicast_lock);
}

int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
struct net_bridge *br = netdev_priv(dev);
struct br_mdb_flush_desc desc = {};
int err;

err = br_mdb_flush_desc_init(&desc, tb, extack);
if (err)
return err;

br_mdb_flush(br, &desc);

return 0;
}

static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
Expand Down
Loading

0 comments on commit d7a39d3

Please sign in to comment.