Skip to content

Commit

Permalink
ice: Add support for AF_XDP
Browse files Browse the repository at this point in the history
Add zero copy AF_XDP support.  This patch adds zero copy support for
Tx and Rx; code for zero copy is added to ice_xsk.h and ice_xsk.c.

For Tx, implement ndo_xsk_wakeup. As with other drivers, reuse
existing XDP Tx queues for this task, since XDP_REDIRECT guarantees
mutual exclusion between different NAPI contexts based on CPU ID. In
turn, a netdev can XDP_REDIRECT to another netdev with a different
NAPI context, since the operation is bound to a specific core and each
core has its own hardware ring.

For Rx, allocate frames as MEM_TYPE_ZERO_COPY on queues that AF_XDP is
enabled.

Signed-off-by: Krzysztof Kazimierczak <[email protected]>
Co-developed-by: Maciej Fijalkowski <[email protected]>
Signed-off-by: Maciej Fijalkowski <[email protected]>
Signed-off-by: Tony Nguyen <[email protected]>
Signed-off-by: Jeff Kirsher <[email protected]>
  • Loading branch information
kkazimie authored and Jeff Kirsher committed Nov 4, 2019
1 parent 0891d6d commit 2d4238f
Show file tree
Hide file tree
Showing 11 changed files with 1,456 additions and 27 deletions.
1 change: 1 addition & 0 deletions drivers/net/ethernet/intel/ice/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ ice-y := ice_main.o \
ice_ethtool.o
ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_lib.o
ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
26 changes: 26 additions & 0 deletions drivers/net/ethernet/intel/ice/ice.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <linux/bpf.h>
#include <linux/avf/virtchnl.h>
#include <net/ipv6.h>
#include <net/xdp_sock.h>
#include "ice_devids.h"
#include "ice_type.h"
#include "ice_txrx.h"
Expand All @@ -44,6 +45,7 @@
#include "ice_sched.h"
#include "ice_virtchnl_pf.h"
#include "ice_sriov.h"
#include "ice_xsk.h"

extern const char ice_drv_ver[];
#define ICE_BAR0 0
Expand Down Expand Up @@ -287,6 +289,9 @@ struct ice_vsi {
struct ice_ring **xdp_rings; /* XDP ring array */
u16 num_xdp_txq; /* Used XDP queues */
u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
struct xdp_umem **xsk_umems;
u16 num_xsk_umems_used;
u16 num_xsk_umems;
} ____cacheline_internodealigned_in_smp;

/* struct that defines an interrupt vector */
Expand Down Expand Up @@ -440,6 +445,27 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
ring->flags |= ICE_TX_FLAGS_RING_XDP;
}

/**
* ice_xsk_umem - get XDP UMEM bound to a ring
* @ring - ring to use
*
* Returns a pointer to xdp_umem structure if there is an UMEM present,
* NULL otherwise.
*/
static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
{
struct xdp_umem **umems = ring->vsi->xsk_umems;
int qid = ring->q_index;

if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;

if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
return NULL;

return umems[qid];
}

/**
* ice_get_main_vsi - Get the PF VSI
* @pf: PF instance
Expand Down
53 changes: 46 additions & 7 deletions drivers/net/ethernet/intel/ice/ice_base.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,17 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
*/
int ice_setup_rx_ctx(struct ice_ring *ring)
{
int chain_len = ICE_MAX_CHAINED_RX_BUFS;
struct ice_vsi *vsi = ring->vsi;
struct ice_hw *hw = &vsi->back->hw;
u32 rxdid = ICE_RXDID_FLEX_NIC;
struct ice_rlan_ctx rlan_ctx;
struct ice_hw *hw;
u32 regval;
u16 pf_q;
int err;

hw = &vsi->back->hw;

/* what is Rx queue number in global space of 2K Rx queues */
pf_q = vsi->rxq_map[ring->q_index];

Expand All @@ -297,10 +300,38 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);

err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL);
if (err)
return err;
ring->xsk_umem = ice_xsk_umem(ring);
if (ring->xsk_umem) {
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);

ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
XDP_PACKET_HEADROOM;
/* For AF_XDP ZC, we disallow packets to span on
* multiple buffers, thus letting us skip that
* handling in the fast-path.
*/
chain_len = 1;
ring->zca.free = ice_zca_free;
err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_ZERO_COPY,
&ring->zca);
if (err)
return err;

dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->q_index);
} else {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index);

err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL);
if (err)
return err;
}
}
/* Receive Queue Base Address.
* Indicates the starting address of the descriptor queue defined in
Expand Down Expand Up @@ -340,7 +371,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
* than 5 x DBUF
*/
rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
ICE_MAX_CHAINED_RX_BUFS * vsi->rx_buf_len);
chain_len * ring->rx_buf_len);

/* Rx queue threshold in units of 64 */
rlan_ctx.lrxqthresh = 1;
Expand Down Expand Up @@ -378,7 +409,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* init queue specific tail register */
ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
writel(0, ring->tail);
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));

err = ring->xsk_umem ?
ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
if (err)
dev_info(&vsi->back->pdev->dev,
"Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_umem ? "UMEM enabled " : "",
ring->q_index, pf_q);

return 0;
}
Expand Down
7 changes: 7 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -2612,6 +2612,13 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
return 0;
}

/* If there is a AF_XDP UMEM attached to any of Rx rings,
* disallow changing the number of descriptors -- regardless
* if the netdev is running or not.
*/
if (ice_xsk_any_rx_ring_ena(vsi))
return -EBUSY;

while (test_and_set_bit(__ICE_CFG_BUSY, pf->state)) {
timeout--;
if (!timeout)
Expand Down
57 changes: 56 additions & 1 deletion drivers/net/ethernet/intel/ice/ice_lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -1283,7 +1283,17 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
*/
int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi)
{
return ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
int ret;
int i;

ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
if (ret)
return ret;

for (i = 0; i < vsi->num_xdp_txq; i++)
vsi->xdp_rings[i]->xsk_umem = ice_xsk_umem(vsi->xdp_rings[i]);

return ret;
}

/**
Expand Down Expand Up @@ -2514,6 +2524,51 @@ char *ice_nvm_version_str(struct ice_hw *hw)
return buf;
}

/**
* ice_update_ring_stats - Update ring statistics
* @ring: ring to update
* @cont: used to increment per-vector counters
* @pkts: number of processed packets
* @bytes: number of processed bytes
*
* This function assumes that caller has acquired a u64_stats_sync lock.
*/
static void
ice_update_ring_stats(struct ice_ring *ring, struct ice_ring_container *cont,
u64 pkts, u64 bytes)
{
ring->stats.bytes += bytes;
ring->stats.pkts += pkts;
cont->total_bytes += bytes;
cont->total_pkts += pkts;
}

/**
* ice_update_tx_ring_stats - Update Tx ring specific counters
* @tx_ring: ring to update
* @pkts: number of processed packets
* @bytes: number of processed bytes
*/
void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes)
{
u64_stats_update_begin(&tx_ring->syncp);
ice_update_ring_stats(tx_ring, &tx_ring->q_vector->tx, pkts, bytes);
u64_stats_update_end(&tx_ring->syncp);
}

/**
* ice_update_rx_ring_stats - Update Rx ring specific counters
* @rx_ring: ring to update
* @pkts: number of processed packets
* @bytes: number of processed bytes
*/
void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
{
u64_stats_update_begin(&rx_ring->syncp);
ice_update_ring_stats(rx_ring, &rx_ring->q_vector->rx, pkts, bytes);
u64_stats_update_end(&rx_ring->syncp);
}

/**
* ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
* @vsi: the VSI being configured MAC filter
Expand Down
4 changes: 4 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi);

int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);

void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);

void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);

void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);

u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
Expand Down
16 changes: 16 additions & 0 deletions drivers/net/ethernet/intel/ice/ice_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1692,6 +1692,7 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi)
if (ice_setup_tx_ring(xdp_ring))
goto free_xdp_rings;
ice_set_ring_xdp(xdp_ring);
xdp_ring->xsk_umem = ice_xsk_umem(xdp_ring);
}

return 0;
Expand Down Expand Up @@ -1934,6 +1935,17 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
if (if_running)
ret = ice_up(vsi);

if (!ret && prog && vsi->xsk_umems) {
int i;

ice_for_each_rxq(vsi, i) {
struct ice_ring *rx_ring = vsi->rx_rings[i];

if (rx_ring->xsk_umem)
napi_schedule(&rx_ring->q_vector->napi);
}
}

return (ret || xdp_ring_err) ? -ENOMEM : 0;
}

Expand All @@ -1959,6 +1971,9 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
case XDP_QUERY_PROG:
xdp->prog_id = vsi->xdp_prog ? vsi->xdp_prog->aux->id : 0;
return 0;
case XDP_SETUP_XSK_UMEM:
return ice_xsk_umem_setup(vsi, xdp->xsk.umem,
xdp->xsk.queue_id);
default:
return -EINVAL;
}
Expand Down Expand Up @@ -5205,4 +5220,5 @@ static const struct net_device_ops ice_netdev_ops = {
.ndo_tx_timeout = ice_tx_timeout,
.ndo_bpf = ice_xdp,
.ndo_xdp_xmit = ice_xdp_xmit,
.ndo_xsk_wakeup = ice_xsk_wakeup,
};
46 changes: 30 additions & 16 deletions drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "ice_lib.h"
#include "ice.h"
#include "ice_dcb_lib.h"
#include "ice_xsk.h"

#define ICE_RX_HDR_SIZE 256

Expand Down Expand Up @@ -58,6 +59,11 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
{
u16 i;

if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
ice_xsk_clean_xdp_ring(tx_ring);
goto tx_skip_free;
}

/* ring already cleared, nothing to do */
if (!tx_ring->tx_buf)
return;
Expand All @@ -66,6 +72,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);

tx_skip_free:
memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count);

/* Zero out the descriptor ring */
Expand Down Expand Up @@ -198,12 +205,8 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)

i += tx_ring->count;
tx_ring->next_to_clean = i;
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->stats.bytes += total_bytes;
tx_ring->stats.pkts += total_pkts;
u64_stats_update_end(&tx_ring->syncp);
tx_ring->q_vector->tx.total_bytes += total_bytes;
tx_ring->q_vector->tx.total_pkts += total_pkts;

ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes);

if (ice_ring_is_xdp(tx_ring))
return !!budget;
Expand Down Expand Up @@ -286,6 +289,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
if (!rx_ring->rx_buf)
return;

if (rx_ring->xsk_umem) {
ice_xsk_clean_rx_ring(rx_ring);
goto rx_skip_free;
}

/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
Expand Down Expand Up @@ -313,6 +321,7 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring)
rx_buf->page_offset = 0;
}

rx_skip_free:
memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count);

/* Zero out the descriptor ring */
Expand Down Expand Up @@ -1073,13 +1082,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
if (xdp_prog)
ice_finalize_xdp_rx(rx_ring, xdp_xmit);

/* update queue and vector specific stats */
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.pkts += total_rx_pkts;
rx_ring->stats.bytes += total_rx_bytes;
u64_stats_update_end(&rx_ring->syncp);
rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes);

/* guarantee a trip back through this routine if there was a failure */
return failure ? budget : (int)total_rx_pkts;
Expand Down Expand Up @@ -1457,9 +1460,14 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_ring(ring, q_vector->tx)
if (!ice_clean_tx_irq(ring, budget))
ice_for_each_ring(ring, q_vector->tx) {
bool wd = ring->xsk_umem ?
ice_clean_tx_irq_zc(ring, budget) :
ice_clean_tx_irq(ring, budget);

if (!wd)
clean_complete = false;
}

/* Handle case where we are called by netpoll with a budget of 0 */
if (unlikely(budget <= 0))
Expand All @@ -1479,7 +1487,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;

cleaned = ice_clean_rx_irq(ring, budget_per_ring);
/* A dedicated path for zero-copy allows making a single
* comparison in the irq context instead of many inside the
* ice_clean_rx_irq function and makes the codebase cleaner.
*/
cleaned = ring->xsk_umem ?
ice_clean_rx_irq_zc(ring, budget_per_ring) :
ice_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
if (cleaned >= budget_per_ring)
Expand Down
Loading

0 comments on commit 2d4238f

Please sign in to comment.