Skip to content

Commit

Permalink
net: mvpp2: software tso support
Browse files Browse the repository at this point in the history
The patch uses the tso API to implement the tso functionality in Marvell
PPv2 driver.

Using iperf and 10G ports, using TSO shows a significant performance
improvement by a factor 2 to reach around 9.5Gbps in TX; as well as a
significant CPU usage drop (from 25% to 15%).

Signed-off-by: Antoine Tenart <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
  • Loading branch information
atenart authored and davem330 committed Aug 24, 2017
1 parent 85affd7 commit 186cd4d
Showing 1 changed file with 157 additions and 14 deletions.
171 changes: 157 additions & 14 deletions drivers/net/ethernet/marvell/mvpp2.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <uapi/linux/ppp_defs.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/tso.h>

/* RX Fifo Registers */
#define MVPP2_RX_DATA_FIFO_SIZE_REG(port) (0x00 + 4 * (port))
Expand Down Expand Up @@ -1010,6 +1011,10 @@ struct mvpp2_txq_pcpu {

/* Index of the TX DMA descriptor to be cleaned up */
int txq_get_index;

/* DMA buffer for TSO headers */
char *tso_headers;
dma_addr_t tso_headers_dma;
};

struct mvpp2_tx_queue {
Expand Down Expand Up @@ -5494,13 +5499,26 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
txq_pcpu->reserved_num = 0;
txq_pcpu->txq_put_index = 0;
txq_pcpu->txq_get_index = 0;

txq_pcpu->tso_headers =
dma_alloc_coherent(port->dev->dev.parent,
MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
&txq_pcpu->tso_headers_dma,
GFP_KERNEL);
if (!txq_pcpu->tso_headers)
goto cleanup;
}

return 0;
cleanup:
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->buffs);

dma_free_coherent(port->dev->dev.parent,
MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
txq_pcpu->tso_headers,
txq_pcpu->tso_headers_dma);
}

dma_free_coherent(port->dev->dev.parent,
Expand All @@ -5520,6 +5538,11 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
for_each_present_cpu(cpu) {
txq_pcpu = per_cpu_ptr(txq->pcpu, cpu);
kfree(txq_pcpu->buffs);

dma_free_coherent(port->dev->dev.parent,
MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
txq_pcpu->tso_headers,
txq_pcpu->tso_headers_dma);
}

if (txq->descs)
Expand Down Expand Up @@ -6049,6 +6072,123 @@ static int mvpp2_tx_frag_process(struct mvpp2_port *port, struct sk_buff *skb,
return -ENOMEM;
}

static inline void mvpp2_tso_put_hdr(struct sk_buff *skb,
struct net_device *dev,
struct mvpp2_tx_queue *txq,
struct mvpp2_tx_queue *aggr_txq,
struct mvpp2_txq_pcpu *txq_pcpu,
int hdr_sz)
{
struct mvpp2_port *port = netdev_priv(dev);
struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
dma_addr_t addr;

mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
mvpp2_txdesc_size_set(port, tx_desc, hdr_sz);

addr = txq_pcpu->tso_headers_dma +
txq_pcpu->txq_put_index * TSO_HEADER_SIZE;
mvpp2_txdesc_offset_set(port, tx_desc, addr & MVPP2_TX_DESC_ALIGN);
mvpp2_txdesc_dma_addr_set(port, tx_desc, addr & ~MVPP2_TX_DESC_ALIGN);

mvpp2_txdesc_cmd_set(port, tx_desc, mvpp2_skb_tx_csum(port, skb) |
MVPP2_TXD_F_DESC |
MVPP2_TXD_PADDING_DISABLE);
mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
}

static inline int mvpp2_tso_put_data(struct sk_buff *skb,
struct net_device *dev, struct tso_t *tso,
struct mvpp2_tx_queue *txq,
struct mvpp2_tx_queue *aggr_txq,
struct mvpp2_txq_pcpu *txq_pcpu,
int sz, bool left, bool last)
{
struct mvpp2_port *port = netdev_priv(dev);
struct mvpp2_tx_desc *tx_desc = mvpp2_txq_next_desc_get(aggr_txq);
dma_addr_t buf_dma_addr;

mvpp2_txdesc_txq_set(port, tx_desc, txq->id);
mvpp2_txdesc_size_set(port, tx_desc, sz);

buf_dma_addr = dma_map_single(dev->dev.parent, tso->data, sz,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(dev->dev.parent, buf_dma_addr))) {
mvpp2_txq_desc_put(txq);
return -ENOMEM;
}

mvpp2_txdesc_offset_set(port, tx_desc,
buf_dma_addr & MVPP2_TX_DESC_ALIGN);
mvpp2_txdesc_dma_addr_set(port, tx_desc,
buf_dma_addr & ~MVPP2_TX_DESC_ALIGN);

if (!left) {
mvpp2_txdesc_cmd_set(port, tx_desc, MVPP2_TXD_L_DESC);
if (last) {
mvpp2_txq_inc_put(port, txq_pcpu, skb, tx_desc);
return 0;
}
} else {
mvpp2_txdesc_cmd_set(port, tx_desc, 0);
}

mvpp2_txq_inc_put(port, txq_pcpu, NULL, tx_desc);
return 0;
}

static int mvpp2_tx_tso(struct sk_buff *skb, struct net_device *dev,
struct mvpp2_tx_queue *txq,
struct mvpp2_tx_queue *aggr_txq,
struct mvpp2_txq_pcpu *txq_pcpu)
{
struct mvpp2_port *port = netdev_priv(dev);
struct tso_t tso;
int hdr_sz = skb_transport_offset(skb) + tcp_hdrlen(skb);
int i, len, descs = 0;

/* Check number of available descriptors */
if (mvpp2_aggr_desc_num_check(port->priv, aggr_txq,
tso_count_descs(skb)) ||
mvpp2_txq_reserved_desc_num_proc(port->priv, txq, txq_pcpu,
tso_count_descs(skb)))
return 0;

tso_start(skb, &tso);
len = skb->len - hdr_sz;
while (len > 0) {
int left = min_t(int, skb_shinfo(skb)->gso_size, len);
char *hdr = txq_pcpu->tso_headers +
txq_pcpu->txq_put_index * TSO_HEADER_SIZE;

len -= left;
descs++;

tso_build_hdr(skb, hdr, &tso, left, len == 0);
mvpp2_tso_put_hdr(skb, dev, txq, aggr_txq, txq_pcpu, hdr_sz);

while (left > 0) {
int sz = min_t(int, tso.size, left);
left -= sz;
descs++;

if (mvpp2_tso_put_data(skb, dev, &tso, txq, aggr_txq,
txq_pcpu, sz, left, len == 0))
goto release;
tso_build_data(skb, &tso, sz);
}
}

return descs;

release:
for (i = descs - 1; i >= 0; i--) {
struct mvpp2_tx_desc *tx_desc = txq->descs + i;
tx_desc_unmap_put(port, txq, tx_desc);
}
return 0;
}

/* Main tx processing */
static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
{
Expand All @@ -6066,6 +6206,10 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
txq_pcpu = this_cpu_ptr(txq->pcpu);
aggr_txq = &port->priv->aggr_txqs[smp_processor_id()];

if (skb_is_gso(skb)) {
frags = mvpp2_tx_tso(skb, dev, txq, aggr_txq, txq_pcpu);
goto out;
}
frags = skb_shinfo(skb)->nr_frags + 1;

/* Check number of available descriptors */
Expand Down Expand Up @@ -6115,22 +6259,21 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
}
}

txq_pcpu->reserved_num -= frags;
txq_pcpu->count += frags;
aggr_txq->count += frags;

/* Enable transmit */
wmb();
mvpp2_aggr_txq_pend_desc_add(port, frags);

if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1) {
struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);

netif_tx_stop_queue(nq);
}
out:
if (frags > 0) {
struct mvpp2_pcpu_stats *stats = this_cpu_ptr(port->stats);
struct netdev_queue *nq = netdev_get_tx_queue(dev, txq_id);

txq_pcpu->reserved_num -= frags;
txq_pcpu->count += frags;
aggr_txq->count += frags;

/* Enable transmit */
wmb();
mvpp2_aggr_txq_pend_desc_add(port, frags);

if (txq_pcpu->size - txq_pcpu->count < MAX_SKB_FRAGS + 1)
netif_tx_stop_queue(nq);

u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
Expand Down Expand Up @@ -7255,7 +7398,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
}
}

features = NETIF_F_SG | NETIF_F_IP_CSUM;
features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
dev->features = features | NETIF_F_RXCSUM;
dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
dev->vlan_features |= features;
Expand Down

0 comments on commit 186cd4d

Please sign in to comment.