Skip to content

Commit

Permalink
spi: spi.c: Convert statistics to per-cpu u64_stats_t
Browse files Browse the repository at this point in the history
This change gives a dramatic performance improvement in the hot path,
since many costly spin_lock_irqsave() calls can be avoided.

On an i.MX8MM system with a MCP2518FD CAN controller connected via SPI,
the time the driver takes to handle interrupts, or in other words the time
the IRQ line of the CAN controller stays low is mainly dominated by the
time it takes to do 3 relatively short sync SPI transfers. The effect of
this patch is a reduction of this time from 136us down to only 98us.

Suggested-by: Andrew Lunn <[email protected]>
Signed-off-by: David Jander <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Mark Brown <[email protected]>
  • Loading branch information
yope authored and Kaz205 committed Aug 14, 2022
1 parent 9f55ccf commit eb3ec43
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 68 deletions.
143 changes: 98 additions & 45 deletions drivers/spi/spi.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <linux/idr.h>
#include <linux/platform_data/x86/apple.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/percpu.h>

#define CREATE_TRACE_POINTS
#include <trace/events/spi.h>
Expand All @@ -49,6 +50,7 @@ static void spidev_release(struct device *dev)

spi_controller_put(spi->controller);
kfree(spi->driver_override);
free_percpu(spi->pcpu_statistics);
kfree(spi);
}

Expand Down Expand Up @@ -93,14 +95,55 @@ static ssize_t driver_override_show(struct device *dev,
}
static DEVICE_ATTR_RW(driver_override);

static struct spi_statistics *spi_alloc_pcpu_stats(struct device *dev)
{
struct spi_statistics __percpu *pcpu_stats;

if (dev)
pcpu_stats = devm_alloc_percpu(dev, struct spi_statistics);
else
pcpu_stats = alloc_percpu_gfp(struct spi_statistics, GFP_KERNEL);

if (pcpu_stats) {
int cpu;

for_each_possible_cpu(cpu) {
struct spi_statistics *stat;

stat = per_cpu_ptr(pcpu_stats, cpu);
u64_stats_init(&stat->syncp);
}
}
return pcpu_stats;
}

#define spi_pcpu_stats_totalize(ret, in, field) \
do { \
int i; \
ret = 0; \
for_each_possible_cpu(i) { \
const struct spi_statistics *pcpu_stats; \
u64 inc; \
unsigned int start; \
pcpu_stats = per_cpu_ptr(in, i); \
do { \
start = u64_stats_fetch_begin_irq( \
&pcpu_stats->syncp); \
inc = u64_stats_read(&pcpu_stats->field); \
} while (u64_stats_fetch_retry_irq( \
&pcpu_stats->syncp, start)); \
ret += inc; \
} \
} while (0)

#define SPI_STATISTICS_ATTRS(field, file) \
static ssize_t spi_controller_##field##_show(struct device *dev, \
struct device_attribute *attr, \
char *buf) \
{ \
struct spi_controller *ctlr = container_of(dev, \
struct spi_controller, dev); \
return spi_statistics_##field##_show(&ctlr->statistics, buf); \
return spi_statistics_##field##_show(ctlr->pcpu_statistics, buf); \
} \
static struct device_attribute dev_attr_spi_controller_##field = { \
.attr = { .name = file, .mode = 0444 }, \
Expand All @@ -111,47 +154,46 @@ static ssize_t spi_device_##field##_show(struct device *dev, \
char *buf) \
{ \
struct spi_device *spi = to_spi_device(dev); \
return spi_statistics_##field##_show(&spi->statistics, buf); \
return spi_statistics_##field##_show(spi->pcpu_statistics, buf); \
} \
static struct device_attribute dev_attr_spi_device_##field = { \
.attr = { .name = file, .mode = 0444 }, \
.show = spi_device_##field##_show, \
}

#define SPI_STATISTICS_SHOW_NAME(name, file, field, format_string) \
#define SPI_STATISTICS_SHOW_NAME(name, file, field) \
static ssize_t spi_statistics_##name##_show(struct spi_statistics *stat, \
char *buf) \
{ \
unsigned long flags; \
ssize_t len; \
spin_lock_irqsave(&stat->lock, flags); \
len = sysfs_emit(buf, format_string "\n", stat->field); \
spin_unlock_irqrestore(&stat->lock, flags); \
u64 val; \
spi_pcpu_stats_totalize(val, stat, field); \
len = sysfs_emit(buf, "%llu\n", val); \
return len; \
} \
SPI_STATISTICS_ATTRS(name, file)

#define SPI_STATISTICS_SHOW(field, format_string) \
#define SPI_STATISTICS_SHOW(field) \
SPI_STATISTICS_SHOW_NAME(field, __stringify(field), \
field, format_string)
field)

SPI_STATISTICS_SHOW(messages, "%lu");
SPI_STATISTICS_SHOW(transfers, "%lu");
SPI_STATISTICS_SHOW(errors, "%lu");
SPI_STATISTICS_SHOW(timedout, "%lu");
SPI_STATISTICS_SHOW(messages);
SPI_STATISTICS_SHOW(transfers);
SPI_STATISTICS_SHOW(errors);
SPI_STATISTICS_SHOW(timedout);

SPI_STATISTICS_SHOW(spi_sync, "%lu");
SPI_STATISTICS_SHOW(spi_sync_immediate, "%lu");
SPI_STATISTICS_SHOW(spi_async, "%lu");
SPI_STATISTICS_SHOW(spi_sync);
SPI_STATISTICS_SHOW(spi_sync_immediate);
SPI_STATISTICS_SHOW(spi_async);

SPI_STATISTICS_SHOW(bytes, "%llu");
SPI_STATISTICS_SHOW(bytes_rx, "%llu");
SPI_STATISTICS_SHOW(bytes_tx, "%llu");
SPI_STATISTICS_SHOW(bytes);
SPI_STATISTICS_SHOW(bytes_rx);
SPI_STATISTICS_SHOW(bytes_tx);

#define SPI_STATISTICS_TRANSFER_BYTES_HISTO(index, number) \
SPI_STATISTICS_SHOW_NAME(transfer_bytes_histo##index, \
"transfer_bytes_histo_" number, \
transfer_bytes_histo[index], "%lu")
transfer_bytes_histo[index])
SPI_STATISTICS_TRANSFER_BYTES_HISTO(0, "0-1");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(1, "2-3");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(2, "4-7");
Expand All @@ -170,7 +212,7 @@ SPI_STATISTICS_TRANSFER_BYTES_HISTO(14, "16384-32767");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(15, "32768-65535");
SPI_STATISTICS_TRANSFER_BYTES_HISTO(16, "65536+");

SPI_STATISTICS_SHOW(transfers_split_maxsize, "%lu");
SPI_STATISTICS_SHOW(transfers_split_maxsize);

static struct attribute *spi_dev_attrs[] = {
&dev_attr_modalias.attr,
Expand Down Expand Up @@ -267,30 +309,30 @@ static const struct attribute_group *spi_master_groups[] = {
NULL,
};

static void spi_statistics_add_transfer_stats(struct spi_statistics *stats,
static void spi_statistics_add_transfer_stats(struct spi_statistics *pcpu_stats,
struct spi_transfer *xfer,
struct spi_controller *ctlr)
{
unsigned long flags;
int l2len = min(fls(xfer->len), SPI_STATISTICS_HISTO_SIZE) - 1;
struct spi_statistics *stats = this_cpu_ptr(pcpu_stats);

if (l2len < 0)
l2len = 0;

spin_lock_irqsave(&stats->lock, flags);
u64_stats_update_begin(&stats->syncp);

stats->transfers++;
stats->transfer_bytes_histo[l2len]++;
u64_stats_inc(&stats->transfers);
u64_stats_inc(&stats->transfer_bytes_histo[l2len]);

stats->bytes += xfer->len;
u64_stats_add(&stats->bytes, xfer->len);
if ((xfer->tx_buf) &&
(xfer->tx_buf != ctlr->dummy_tx))
stats->bytes_tx += xfer->len;
u64_stats_add(&stats->bytes_tx, xfer->len);
if ((xfer->rx_buf) &&
(xfer->rx_buf != ctlr->dummy_rx))
stats->bytes_rx += xfer->len;
u64_stats_add(&stats->bytes_rx, xfer->len);

spin_unlock_irqrestore(&stats->lock, flags);
u64_stats_update_end(&stats->syncp);
}

/*
Expand Down Expand Up @@ -519,14 +561,19 @@ struct spi_device *spi_alloc_device(struct spi_controller *ctlr)
return NULL;
}

spi->pcpu_statistics = spi_alloc_pcpu_stats(NULL);
if (!spi->pcpu_statistics) {
kfree(spi);
spi_controller_put(ctlr);
return NULL;
}

spi->master = spi->controller = ctlr;
spi->dev.parent = &ctlr->dev;
spi->dev.bus = &spi_bus_type;
spi->dev.release = spidev_release;
spi->mode = ctlr->buswidth_override_bits;

spin_lock_init(&spi->statistics.lock);

device_initialize(&spi->dev);
return spi;
}
Expand Down Expand Up @@ -1225,8 +1272,8 @@ static int spi_transfer_wait(struct spi_controller *ctlr,
struct spi_message *msg,
struct spi_transfer *xfer)
{
struct spi_statistics *statm = &ctlr->statistics;
struct spi_statistics *stats = &msg->spi->statistics;
struct spi_statistics *statm = ctlr->pcpu_statistics;
struct spi_statistics *stats = msg->spi->pcpu_statistics;
u32 speed_hz = xfer->speed_hz;
unsigned long long ms;

Expand Down Expand Up @@ -1382,8 +1429,8 @@ static int spi_transfer_one_message(struct spi_controller *ctlr,
struct spi_transfer *xfer;
bool keep_cs = false;
int ret = 0;
struct spi_statistics *statm = &ctlr->statistics;
struct spi_statistics *stats = &msg->spi->statistics;
struct spi_statistics *statm = ctlr->pcpu_statistics;
struct spi_statistics *stats = msg->spi->pcpu_statistics;

spi_set_cs(msg->spi, true, false);

Expand Down Expand Up @@ -3050,7 +3097,11 @@ int spi_register_controller(struct spi_controller *ctlr)
}
}
/* add statistics */
spin_lock_init(&ctlr->statistics.lock);
ctlr->pcpu_statistics = spi_alloc_pcpu_stats(dev);
if (!ctlr->pcpu_statistics) {
dev_err(dev, "Error allocating per-cpu statistics\n");
goto destroy_queue;
}

mutex_lock(&board_lock);
list_add_tail(&ctlr->list, &spi_controller_list);
Expand All @@ -3063,6 +3114,8 @@ int spi_register_controller(struct spi_controller *ctlr)
acpi_register_spi_devices(ctlr);
return status;

destroy_queue:
spi_destroy_queue(ctlr);
free_bus_id:
mutex_lock(&board_lock);
idr_remove(&spi_master_idr, ctlr->bus_num);
Expand Down Expand Up @@ -3388,9 +3441,9 @@ static int __spi_split_transfer_maxsize(struct spi_controller *ctlr,
*xferp = &xfers[count - 1];

/* increment statistics counters */
SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics,
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics,
transfers_split_maxsize);
SPI_STATISTICS_INCREMENT_FIELD(&msg->spi->statistics,
SPI_STATISTICS_INCREMENT_FIELD(msg->spi->pcpu_statistics,
transfers_split_maxsize);

return 0;
Expand Down Expand Up @@ -3781,8 +3834,8 @@ static int __spi_async(struct spi_device *spi, struct spi_message *message)

message->spi = spi;

SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_async);
SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_async);
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_async);
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_async);

trace_spi_message_submit(message);

Expand Down Expand Up @@ -3929,8 +3982,8 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
message->context = &done;
message->spi = spi;

SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics, spi_sync);
SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics, spi_sync);
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics, spi_sync);
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics, spi_sync);

/*
* If we're not using the legacy transfer method then we will
Expand All @@ -3953,9 +4006,9 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message)
if (status == 0) {
/* Push out the messages in the calling context if we can */
if (ctlr->transfer == spi_queued_transfer) {
SPI_STATISTICS_INCREMENT_FIELD(&ctlr->statistics,
SPI_STATISTICS_INCREMENT_FIELD(ctlr->pcpu_statistics,
spi_sync_immediate);
SPI_STATISTICS_INCREMENT_FIELD(&spi->statistics,
SPI_STATISTICS_INCREMENT_FIELD(spi->pcpu_statistics,
spi_sync_immediate);
__spi_pump_messages(ctlr, false);
}
Expand Down
52 changes: 29 additions & 23 deletions include/linux/spi/spi.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <uapi/linux/spi/spi.h>
#include <linux/acpi.h>
#include <linux/u64_stats_sync.h>

struct dma_chan;
struct software_node;
Expand Down Expand Up @@ -59,37 +60,42 @@ extern struct bus_type spi_bus_type;
* maxsize limit
*/
struct spi_statistics {
spinlock_t lock; /* lock for the whole structure */
struct u64_stats_sync syncp;

unsigned long messages;
unsigned long transfers;
unsigned long errors;
unsigned long timedout;
u64_stats_t messages;
u64_stats_t transfers;
u64_stats_t errors;
u64_stats_t timedout;

unsigned long spi_sync;
unsigned long spi_sync_immediate;
unsigned long spi_async;
u64_stats_t spi_sync;
u64_stats_t spi_sync_immediate;
u64_stats_t spi_async;

unsigned long long bytes;
unsigned long long bytes_rx;
unsigned long long bytes_tx;
u64_stats_t bytes;
u64_stats_t bytes_rx;
u64_stats_t bytes_tx;

#define SPI_STATISTICS_HISTO_SIZE 17
unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE];
u64_stats_t transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE];

unsigned long transfers_split_maxsize;
u64_stats_t transfers_split_maxsize;
};

#define SPI_STATISTICS_ADD_TO_FIELD(stats, field, count) \
do { \
unsigned long flags; \
spin_lock_irqsave(&(stats)->lock, flags); \
(stats)->field += count; \
spin_unlock_irqrestore(&(stats)->lock, flags); \
#define SPI_STATISTICS_ADD_TO_FIELD(pcpu_stats, field, count) \
do { \
struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \
u64_stats_update_begin(&__lstats->syncp); \
u64_stats_add(&__lstats->field, count); \
u64_stats_update_end(&__lstats->syncp); \
} while (0)

#define SPI_STATISTICS_INCREMENT_FIELD(stats, field) \
SPI_STATISTICS_ADD_TO_FIELD(stats, field, 1)
#define SPI_STATISTICS_INCREMENT_FIELD(pcpu_stats, field) \
do { \
struct spi_statistics *__lstats = this_cpu_ptr(pcpu_stats); \
u64_stats_update_begin(&__lstats->syncp); \
u64_stats_inc(&__lstats->field); \
u64_stats_update_end(&__lstats->syncp); \
} while (0)

/**
* struct spi_delay - SPI delay information
Expand Down Expand Up @@ -194,7 +200,7 @@ struct spi_device {
struct spi_delay cs_inactive;

/* the statistics */
struct spi_statistics statistics;
struct spi_statistics __percpu *pcpu_statistics;

/*
* likely need more hooks for more protocol options affecting how
Expand Down Expand Up @@ -647,7 +653,7 @@ struct spi_controller {
s8 max_native_cs;

/* statistics */
struct spi_statistics statistics;
struct spi_statistics __percpu *pcpu_statistics;

/* DMA channels for use with core dmaengine helpers */
struct dma_chan *dma_tx;
Expand Down

0 comments on commit eb3ec43

Please sign in to comment.