Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

xdp: add a new helper for dev map multicast support #13

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
sudo: required
language: bash
dist: bionic
services:
- docker

env:
global:
- PROJECT_NAME='libbpf'
- AUTHOR_EMAIL="$(git log -1 --pretty=\"%aE\")"
- REPO_ROOT="$TRAVIS_BUILD_DIR"
- CI_ROOT="$REPO_ROOT/travis-ci"
- VMTEST_ROOT="$CI_ROOT/vmtest"

addons:
apt:
packages:
- qemu-kvm
- zstd
- binutils-dev
- elfutils
- libcap-dev
- libelf-dev
- libdw-dev

jobs:
include:
- stage: Builds & Tests
name: Kernel LATEST + selftests
language: bash
env: KERNEL=LATEST
script: $CI_ROOT/vmtest/run_vmtest.sh || travis_terminate 1
21 changes: 21 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_CONST_MAP_PTR_OR_NULL, /* const argument used as pointer to bpf_map or NULL */
};

/* type of values returned from helper functions */
Expand Down Expand Up @@ -1341,6 +1342,11 @@ int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map,
int exclude_ifindex);
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, struct bpf_map *ex_map,
u32 flags);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);
Expand Down Expand Up @@ -1516,6 +1522,21 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return 0;
}

static inline
bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map,
int exclude_ifindex)
{
return false;
}

static inline
int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, struct bpf_map *ex_map,
u32 flags)
{
return 0;
}

struct sk_buff;

static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
Expand Down
1 change: 1 addition & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,7 @@ struct bpf_redirect_info {
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
struct bpf_map *ex_map;
u32 kern_flags;
};

Expand Down
1 change: 1 addition & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ void xdp_warn(const char *msg, const char *func, const int line);
#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)

struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);

static inline
void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
Expand Down
27 changes: 27 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -3579,6 +3579,27 @@ union bpf_attr {
* the data in *dst*. This is a wrapper of **copy_from_user**\ ().
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_redirect_map_multi(struct bpf_map *map, struct bpf_map *ex_map, u64 flags)
* Description
* This is a multicast implementation for XDP redirect. It will
* redirect the packet to ALL the interfaces in *map*, but
* exclude the interfaces in *ex_map*.
*
* The forwarding *map* could be either BPF_MAP_TYPE_DEVMAP or
* BPF_MAP_TYPE_DEVMAP_HASH. But the *ex_map* must be
* BPF_MAP_TYPE_DEVMAP_HASH to get better performance.
*
* Currently the *flags* only supports *BPF_F_EXCLUDE_INGRESS*,
* which additionally excludes the current ingress device.
*
* See also bpf_redirect_map() as a unicast implementation,
* which supports redirecting packet to a specific ifindex
* in the map. As both helpers use struct bpf_redirect_info
* to store the redirect info, we will use a a NULL tgt_value
* to distinguish multicast and unicast redirecting.
* Return
* **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -3730,6 +3751,7 @@ union bpf_attr {
FN(inode_storage_delete), \
FN(d_path), \
FN(copy_from_user), \
FN(redirect_map_multi), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down Expand Up @@ -3901,6 +3923,11 @@ enum bpf_lwt_encap_mode {
BPF_LWT_ENCAP_IP,
};

/* BPF_FUNC_redirect_map_multi flags. */
enum {
BPF_F_EXCLUDE_INGRESS = (1ULL << 0),
};

#define __bpf_md_ptr(type, name) \
union { \
type name; \
Expand Down
132 changes: 132 additions & 0 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,138 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
return __xdp_enqueue(dev, xdp, dev_rx);
}

/* Use direct call in fast path instead of map->ops->map_get_next_key() */
static int devmap_get_next_key(struct bpf_map *map, void *key, void *next_key)
{

switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
return dev_map_get_next_key(map, key, next_key);
case BPF_MAP_TYPE_DEVMAP_HASH:
return dev_map_hash_get_next_key(map, key, next_key);
default:
break;
}

return -ENOENT;
}

bool dev_in_exclude_map(struct bpf_dtab_netdev *obj, struct bpf_map *map,
int exclude_ifindex)
{
if (obj->dev->ifindex == exclude_ifindex)
return true;

if (!map)
return false;

return __dev_map_hash_lookup_elem(map, obj->dev->ifindex) != NULL;
}

static struct bpf_dtab_netdev *devmap_get_next_obj(struct xdp_buff *xdp, struct bpf_map *map,
struct bpf_map *ex_map, u32 *key,
u32 *next_key, int ex_ifindex)
{
struct bpf_dtab_netdev *obj;
struct net_device *dev;
u32 *tmp_key = key;
u32 index;
int err;

err = devmap_get_next_key(map, tmp_key, next_key);
if (err)
return NULL;

/* When using dev map hash, we could restart the hashtab traversal
* in case the key has been updated/removed in the mean time.
* So we may end up potentially looping due to traversal restarts
* from first elem.
*
* Let's use map's max_entries to limit the loop number.
*/
for (index = 0; index < map->max_entries; index++) {
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
obj = __dev_map_lookup_elem(map, *next_key);
break;
case BPF_MAP_TYPE_DEVMAP_HASH:
obj = __dev_map_hash_lookup_elem(map, *next_key);
break;
default:
break;
}

if (!obj || dev_in_exclude_map(obj, ex_map, ex_ifindex))
goto find_next;

dev = obj->dev;

if (!dev->netdev_ops->ndo_xdp_xmit)
goto find_next;

err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
if (unlikely(err))
goto find_next;

return obj;

find_next:
tmp_key = next_key;
err = devmap_get_next_key(map, tmp_key, next_key);
if (err)
break;
}

return NULL;
}

int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
struct bpf_map *map, struct bpf_map *ex_map,
u32 flags)
{
struct bpf_dtab_netdev *obj = NULL, *next_obj = NULL;
struct xdp_frame *xdpf, *nxdpf;
bool last_one = false;
int ex_ifindex;
u32 key, next_key;

ex_ifindex = flags & BPF_F_EXCLUDE_INGRESS ? dev_rx->ifindex : 0;

/* Find first available obj */
obj = devmap_get_next_obj(xdp, map, ex_map, NULL, &key, ex_ifindex);
if (!obj)
return 0;

xdpf = xdp_convert_buff_to_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;

for (;;) {
/* Check if we still have one more available obj */
next_obj = devmap_get_next_obj(xdp, map, ex_map, &key,
&next_key, ex_ifindex);
if (!next_obj)
last_one = true;

if (last_one) {
bq_enqueue(obj->dev, xdpf, dev_rx);
return 0;
}

nxdpf = xdpf_clone(xdpf);
if (unlikely(!nxdpf)) {
xdp_return_frame_rx_napi(xdpf);
return -ENOMEM;
}

bq_enqueue(obj->dev, nxdpf, dev_rx);

/* Deal with next obj */
obj = next_obj;
key = next_key;
}
}

int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
Expand Down
20 changes: 15 additions & 5 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -3966,9 +3966,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
expected_type = SCALAR_VALUE;
if (type != expected_type)
goto err_type;
} else if (arg_type == ARG_CONST_MAP_PTR) {
} else if (arg_type == ARG_CONST_MAP_PTR ||
arg_type == ARG_CONST_MAP_PTR_OR_NULL) {
expected_type = CONST_PTR_TO_MAP;
if (type != expected_type)
if (register_is_null(reg) &&
arg_type == ARG_CONST_MAP_PTR_OR_NULL)
/* fall through to next check */;
else if (type != expected_type)
goto err_type;
} else if (arg_type == ARG_PTR_TO_CTX ||
arg_type == ARG_PTR_TO_CTX_OR_NULL) {
Expand Down Expand Up @@ -4085,9 +4089,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EFAULT;
}

if (arg_type == ARG_CONST_MAP_PTR) {
/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
meta->map_ptr = reg->map_ptr;
if (arg_type == ARG_CONST_MAP_PTR ||
arg_type == ARG_CONST_MAP_PTR_OR_NULL) {
meta->map_ptr = register_is_null(reg) ? NULL : reg->map_ptr;
} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
/* bpf_map_xxx(..., map_ptr, ..., key) call:
* check that [key, key + map->key_size) are within
Expand Down Expand Up @@ -4269,6 +4273,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
if (func_id != BPF_FUNC_redirect_map &&
func_id != BPF_FUNC_redirect_map_multi &&
func_id != BPF_FUNC_map_lookup_elem)
goto error;
break;
Expand Down Expand Up @@ -4368,6 +4373,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
map->map_type != BPF_MAP_TYPE_XSKMAP)
goto error;
break;
case BPF_FUNC_redirect_map_multi:
if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
map->map_type != BPF_MAP_TYPE_DEVMAP_HASH)
goto error;
break;
case BPF_FUNC_sk_redirect_map:
case BPF_FUNC_msg_redirect_map:
case BPF_FUNC_sock_map_update:
Expand Down
Loading