From 354b5672454086bf269e3c24dba640447f74b4c2 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Mon, 17 Jul 2023 10:08:17 +0200 Subject: [PATCH] netdev-linux: Support 64-bit rates in tc policing. Use TCA_POLICE_RATE64 if the rate cannot be expressed using 32bits. This breaks the 32Gbps barrier. The new barrier is ~4Tbps caused by netdev's API expressing kbps rates using 32-bit integers. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2137643 Acked-by: Eelco Chaudron Signed-off-by: Adrian Moreno Signed-off-by: Ilya Maximets --- NEWS | 2 ++ acinclude.m4 | 10 ++++++++++ lib/netdev-linux.c | 19 ++++++++++++------- lib/netdev-linux.h | 2 +- lib/tc.c | 2 ++ tests/atlocal.in | 1 + tests/system-traffic.at | 21 +++++++++++++++++++++ 7 files changed, 49 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index d798ec0a127..7284dedff62 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,8 @@ Post-v3.1.0 - QoS: * Added new configuration option 'jitter' for a linux-netem QoS type. * 'linux-htb' QoS type now supports rates higher than 34 Gbps. + - Ingress Policing: + * Ingress policing byte rates can now be configured higher than 34 Gbps. - DPDK: * ovs-vswitchd will keep the CAP_SYS_RAWIO capability when started with the --hw-rawio-access command line option. This allows the diff --git a/acinclude.m4 b/acinclude.m4 index 28d028f371b..f1ba046c238 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -228,6 +228,16 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [ [Define to 1 if TCA_HTB_RATE64 is available.])], [AC_SUBST(HAVE_TCA_HTB_RATE64,no)] ) + + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include ], [ + int x = TCA_POLICE_PKTRATE64; + ])], + [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,yes) + AC_DEFINE([HAVE_TCA_POLICE_PKTRATE64], [1], + [Define to 1 if TCA_POLICE_PKTRATE64 is available.])], + [AC_SUBST(HAVE_TCA_POLICE_PKTRATE64,no)] + ) ]) dnl OVS_CHECK_LINUX_SCTP_CT diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 759c98d33db..cca3408797e 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -494,7 +494,7 @@ static struct tcmsg *netdev_linux_tc_make_request(const struct netdev *, unsigned int flags, struct ofpbuf *); -static int tc_add_policer(struct netdev *, uint32_t kbits_rate, +static int tc_add_policer(struct netdev *, uint64_t kbits_rate, uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t kpkts_burst); @@ -2694,6 +2694,7 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t index, uint64_t pkts_rate, uint64_t pkts_burst, uint32_t notexceed_act, bool single_action) { + uint64_t bytes_rate = kbits_rate / 8 * 1000; size_t offset, act_offset; struct tc_police police; uint32_t prio = 0; @@ -2708,8 +2709,13 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t index, nl_msg_act_police_start_nest(request, ++prio, &offset, &act_offset, single_action); if (police.rate.rate) { - tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, 0); + tc_put_rtab(request, TCA_POLICE_RATE, &police.rate, bytes_rate); } +#ifdef HAVE_TCA_POLICE_PKTRATE64 + if (bytes_rate > UINT32_MAX) { + nl_msg_put_u64(request, TCA_POLICE_RATE64, bytes_rate); + } +#endif if (pkts_rate) { uint64_t pkt_burst_ticks; /* Here tc_bytes_to_ticks is used to convert packets rather than bytes @@ -2723,7 +2729,7 @@ nl_msg_put_act_police(struct ofpbuf *request, uint32_t index, } static int -tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate, +tc_add_matchall_policer(struct netdev *netdev, uint64_t kbits_rate, uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t kpkts_burst) { @@ -5742,9 +5748,8 @@ tc_policer_init(struct tc_police *tc_police, uint64_t kbits_rate, * Returns 0 if successful, otherwise a positive errno value. */ static int -tc_add_policer(struct netdev *netdev, uint32_t kbits_rate, - uint32_t kbits_burst, uint32_t kpkts_rate, - uint32_t kpkts_burst) +tc_add_policer(struct netdev *netdev, uint64_t kbits_rate, + uint32_t kbits_burst, uint32_t kpkts_rate, uint32_t kpkts_burst) { size_t basic_offset, police_offset; struct ofpbuf request; @@ -5778,7 +5783,7 @@ tc_add_policer(struct netdev *netdev, uint32_t kbits_rate, } int -tc_add_policer_action(uint32_t index, uint32_t kbits_rate, +tc_add_policer_action(uint32_t index, uint64_t kbits_rate, uint32_t kbits_burst, uint32_t pkts_rate, uint32_t pkts_burst, bool update) { diff --git a/lib/netdev-linux.h b/lib/netdev-linux.h index 9a416ce505c..ec19b0dedc4 100644 --- a/lib/netdev-linux.h +++ b/lib/netdev-linux.h @@ -29,7 +29,7 @@ struct netdev; int netdev_linux_ethtool_set_flag(struct netdev *netdev, uint32_t flag, const char *flag_name, bool enable); int linux_get_ifindex(const char *netdev_name); -int tc_add_policer_action(uint32_t index, uint32_t kbits_rate, +int tc_add_policer_action(uint32_t index, uint64_t kbits_rate, uint32_t kbits_burst, uint32_t pkts_rate, uint32_t pkts_burst, bool update); int tc_del_policer_action(uint32_t index, struct ofputil_meter_stats *stats); diff --git a/lib/tc.c b/lib/tc.c index e34a1a5f090..f49048cdaba 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -1504,6 +1504,8 @@ static const struct nl_policy police_policy[] = { [TCA_POLICE_RATE] = { .type = NL_A_UNSPEC, .min_len = 1024, .optional = true, }, + [TCA_POLICE_RATE64] = { .type = NL_A_U32, + .optional = true, }, [TCA_POLICE_PEAKRATE] = { .type = NL_A_UNSPEC, .min_len = 1024, .optional = true, }, diff --git a/tests/atlocal.in b/tests/atlocal.in index ffdea5cc017..1013098a184 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -8,6 +8,7 @@ EGREP='@EGREP@' PYTHON3='@PYTHON3@' CFLAGS='@CFLAGS@' HAVE_TCA_HTB_RATE64='@HAVE_TCA_HTB_RATE64@' +HAVE_TCA_POLICE_PKTRATE64='@HAVE_TCA_POLICE_PKTRATE64@' # PYTHONCOERCECLOCALE=0 disables the Unicode compatibility warning on # stderr that breaks almost any Python3 test (PEP 0538) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index ecb37303a0f..945037ec057 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2382,6 +2382,27 @@ AT_CHECK([tc class show dev ovs-p1 | grep -q 'class htb .* HTB_CONF']) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([Ingress Policing - 64-bit]) +AT_SKIP_IF([test $HAVE_TC = no]) +AT_SKIP_IF([test $HAVE_TCA_POLICE_PKTRATE64 = no]) +OVS_TRAFFIC_VSWITCHD_START() +ADD_NAMESPACES(ns0) +ADD_VETH(p0, ns0, br0, "10.1.1.1/24") + +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_rate=50000000]) +AT_CHECK([ovs-vsctl set interface ovs-p0 ingress_policing_burst=400000]) + +AT_CHECK([tc -o -s -d filter show dev ovs-p0 ingress | + sed -n 's/.*\(rate [[0-9]]*[[a-zA-Z]]* burst [[0-9]]*[[a-zA-Z]]*\).*/\1/; T; p; q'], + [0],[dnl +rate 50Gbit burst 74500000b +]) + +AT_CHECK([tc -s -d filter show dev ovs-p0 ingress | + grep -E "basic|matchall" > /dev/null], [0]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_BANNER([conntrack]) AT_SETUP([conntrack - controller])