Skip to content

Commit

Permalink
mlxsw: pci: Initialize page pool per CQ
Browse files Browse the repository at this point in the history
Next patch will use page pool to allocate buffers for RDQ. Initialize
page pool for each CQ, which is mapped 1:1 to RDQ. Page pool for each Rx
queue enhances Rx side performance by reclaiming buffers back to each queue
specific pool.

When only one NAPI instance is the consumer of pages from page pool, it is
recommended to pass it as part of 'page_pool_params', then page pool APIs
will be done without special locks. mlxsw driver holds NAPI instance per
CQ, so add page pool per CQ and use the existing NAPI instance.

For now, pages are not allocated from the pool, next patch will use it.

Some notes regarding 'page_pool_params':
* Use PP_FLAG_DMA_MAP to allow page pool handles DMA mapping, for now
  do not use sync flag, as only the device writes to this memory and we
  read it only when it finishes writing there. This will probably be
  changed when we will support XDP.
* Define 'order' according to maximum MTU and take into account software
  overhead. Some round up are done, which means that we allocate more pages
  than we really need. This can be improved later by using fragmented
  buffers.
* Use pool_size = MLXSW_PCI_WQE_COUNT. This will be the size of 'ptr_ring',
  and should be the maximum amount of packets that page pool will allocate
  memory for. In our case, this is the queue size, defined as
  MLXSW_PCI_WQE_COUNT.

Signed-off-by: Amit Cohen <[email protected]>
Reviewed-by: Petr Machata <[email protected]>
Reviewed-by: Ido Schimmel <[email protected]>
Signed-off-by: Petr Machata <[email protected]>
Reviewed-by: Jiri Pirko <[email protected]>
Link: https://lore.kernel.org/r/02e5856ae7c572d4293ce6bb92c286ee6cfec800.1718709196.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <[email protected]>
  • Loading branch information
Amit Cohen authored and kuba-moo committed Jun 20, 2024
1 parent 7555b7f commit 5642c6a
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 1 deletion.
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlxsw/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ config MLXSW_CORE_THERMAL
config MLXSW_PCI
tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
depends on PCI && HAS_IOMEM && MLXSW_CORE
select PAGE_POOL
default m
help
This is PCI bus implementation for Mellanox Technologies Switch ASICs.
Expand Down
60 changes: 59 additions & 1 deletion drivers/net/ethernet/mellanox/mlxsw/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/if_vlan.h>
#include <linux/log2.h>
#include <linux/string.h>
#include <net/page_pool/helpers.h>

#include "pci_hw.h"
#include "pci.h"
Expand Down Expand Up @@ -88,6 +89,7 @@ struct mlxsw_pci_queue {
enum mlxsw_pci_cqe_v v;
struct mlxsw_pci_queue *dq;
struct napi_struct napi;
struct page_pool *page_pool;
} cq;
struct {
struct tasklet_struct tasklet;
Expand Down Expand Up @@ -338,6 +340,12 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci,
mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num);
}

#define MLXSW_PCI_SKB_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN)

#define MLXSW_PCI_RX_BUF_SW_OVERHEAD \
(MLXSW_PCI_SKB_HEADROOM + \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe,
int index, char *frag_data, size_t frag_len,
int direction)
Expand Down Expand Up @@ -844,9 +852,47 @@ static void mlxsw_pci_cq_napi_teardown(struct mlxsw_pci_queue *q)
netif_napi_del(&q->u.cq.napi);
}

static int mlxsw_pci_cq_page_pool_init(struct mlxsw_pci_queue *q,
enum mlxsw_pci_cq_type cq_type)
{
struct page_pool_params pp_params = {};
struct mlxsw_pci *mlxsw_pci = q->pci;
struct page_pool *page_pool;
u32 max_pkt_size;

if (cq_type != MLXSW_PCI_CQ_RDQ)
return 0;

max_pkt_size = MLXSW_PORT_MAX_MTU + MLXSW_PCI_RX_BUF_SW_OVERHEAD;
pp_params.order = get_order(max_pkt_size);
pp_params.flags = PP_FLAG_DMA_MAP;
pp_params.pool_size = MLXSW_PCI_WQE_COUNT;
pp_params.nid = dev_to_node(&mlxsw_pci->pdev->dev);
pp_params.dev = &mlxsw_pci->pdev->dev;
pp_params.napi = &q->u.cq.napi;
pp_params.dma_dir = DMA_FROM_DEVICE;

page_pool = page_pool_create(&pp_params);
if (IS_ERR(page_pool))
return PTR_ERR(page_pool);

q->u.cq.page_pool = page_pool;
return 0;
}

static void mlxsw_pci_cq_page_pool_fini(struct mlxsw_pci_queue *q,
enum mlxsw_pci_cq_type cq_type)
{
if (cq_type != MLXSW_PCI_CQ_RDQ)
return;

page_pool_destroy(q->u.cq.page_pool);
}

static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
struct mlxsw_pci_queue *q)
{
enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);
int i;
int err;

Expand Down Expand Up @@ -876,17 +922,29 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox,
err = mlxsw_cmd_sw2hw_cq(mlxsw_pci->core, mbox, q->num);
if (err)
return err;
mlxsw_pci_cq_napi_setup(q, mlxsw_pci_cq_type(mlxsw_pci, q));
mlxsw_pci_cq_napi_setup(q, cq_type);

err = mlxsw_pci_cq_page_pool_init(q, cq_type);
if (err)
goto err_page_pool_init;

napi_enable(&q->u.cq.napi);
mlxsw_pci_queue_doorbell_consumer_ring(mlxsw_pci, q);
mlxsw_pci_queue_doorbell_arm_consumer_ring(mlxsw_pci, q);
return 0;

err_page_pool_init:
mlxsw_pci_cq_napi_teardown(q);
return err;
}

static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci,
struct mlxsw_pci_queue *q)
{
enum mlxsw_pci_cq_type cq_type = mlxsw_pci_cq_type(mlxsw_pci, q);

napi_disable(&q->u.cq.napi);
mlxsw_pci_cq_page_pool_fini(q, cq_type);
mlxsw_pci_cq_napi_teardown(q);
mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num);
}
Expand Down

0 comments on commit 5642c6a

Please sign in to comment.