Skip to content

Commit

Permalink
Optimise mbed_ticker_api.c
Browse files Browse the repository at this point in the history
The generic code in mbed_ticker_api.c uses run-time polymorphism to
handle different tickers, and has generic run-time calculations for
different ticker widths and frequencies, with a single special-case for
1MHz.

Extend the run-time special casing to handle any conversion cases where
either the multiply or divide can be done as a shift. This is a speed
optimisation for certain platforms.

Add a new option `target.custom-tickers`. If turned off, it promises
that only USTICKER and LPTICKER devices will be used. This then permits
elimination and/or simplification of runtime calculations, saving size
and speed. If either both USTICKER and LPTICKER have the same width, or
same period numerator or denominator, or only one of them exists, then
operations can be hard-coded. This is a significant ROM space saving,
and a minor speed and RAM saving.

We get to optimise all the calculations, but the run-time polymorphism
is retained even if there is only one ticker, as it doesn't
significantly affect code size versus direct calls, and the existence of
lp_ticker_wrapper and various us_ticker optimisations requires it, even
if only LPTICKER is available.
  • Loading branch information
kjbracey committed May 4, 2020
1 parent 56396d6 commit ac06508
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 50 deletions.
239 changes: 192 additions & 47 deletions hal/mbed_ticker_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,95 @@
#include "platform/mbed_assert.h"
#include "platform/mbed_error.h"

#if !MBED_CONF_TARGET_CUSTOM_TICKERS
#include "us_ticker_api.h"
#include "lp_ticker_api.h"
#endif

// It's almost always worth avoiding division, but only worth avoiding
// multiplication on some cores.
#if defined(__CORTEX_M0) || defined(__CORTEX_M0PLUS) || defined(__CORTEX_M23)
#define SLOW_MULTIPLY 1
#else
#define SLOW_MULTIPLY 0
#endif

// Do we compute ratio from frequency, or can we always get it from defines?
#if MBED_CONF_TARGET_CUSTOM_TICKERS || (DEVICE_USTICKER && !defined US_TICKER_PERIOD_NUM) || (DEVICE_LPTICKER && !defined LP_TICKER_PERIOD_NUM)
#define COMPUTE_RATIO_FROM_FREQUENCY 1
#else
#define COMPUTE_RATIO_FROM_FREQUENCY 0
#endif

static void schedule_interrupt(const ticker_data_t *const ticker);
static void update_present_time(const ticker_data_t *const ticker);

/* Macros that either look up the info from mbed_ticker_queue_t, or give a constant.
* Some constants are defined during the definition of initialize, to keep the
* compile-time and run-time calculations alongside each other.
*/
#ifdef MBED_TICKER_CONSTANT_PERIOD_NUM
#define TICKER_PERIOD_NUM(queue) MBED_TICKER_CONSTANT_PERIOD_NUM
// don't bother doing this - rely on the compiler being able convert "/ 2^k" to ">> k".
#define TICKER_PERIOD_NUM_SHIFTS(queue) (-1)
#else
#define TICKER_PERIOD_NUM(queue) ((queue)->period_num)
#define TICKER_PERIOD_NUM_SHIFTS(queue) ((queue)->period_num_shifts)
#endif

#ifdef MBED_TICKER_CONSTANT_PERIOD_DEN
#define TICKER_PERIOD_DEN(queue) MBED_TICKER_CONSTANT_PERIOD_DEN
#define TICKER_PERIOD_DEN_SHIFTS(queue) (-1)
#else
#define TICKER_PERIOD_DEN(queue) ((queue)->period_den)
#define TICKER_PERIOD_DEN_SHIFTS(queue) ((queue)->period_den_shifts)
#endif

// But the above can generate compiler warnings from `if (-1 >= 0) { x >>= -1; }`
#if defined ( __CC_ARM )
#pragma diag_suppress 62 // Shift count is negative
#elif defined ( __GNUC__ )
#pragma GCC diagnostic ignored "-Wshift-count-negative"
#elif defined (__ICCARM__)
#pragma diag_suppress=Pe062 // Shift count is negative
#endif

#ifdef MBED_TICKER_CONSTANT_MASK
#define TICKER_BITMASK(queue) MBED_TICKER_CONSTANT_MASK
#define TICKER_MAX_DELTA(queue) CONSTANT_MAX_DELTA
#else
#define TICKER_BITMASK(queue) ((queue)->bitmask)
#define TICKER_MAX_DELTA(queue) ((queue)->max_delta)
#endif

#if defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK
#define TICKER_MAX_DELTA_US(queue) CONSTANT_MAX_DELTA_US
#else
#define TICKER_MAX_DELTA_US(queue) ((queue)->max_delta_us)
#endif

#if COMPUTE_RATIO_FROM_FREQUENCY
static inline uint32_t gcd(uint32_t a, uint32_t b)
{
do {
uint32_t r = a % b;
a = b;
b = r;
} while (b != 0);
return a;
}

static int exact_log2(uint32_t n)
{
for (int i = 31; i > 0; --i) {
if ((1U << i) == n) {
return i;
}
}
return -1;
}
#endif

/*
* Initialize a ticker instance.
*/
Expand All @@ -40,9 +126,36 @@ static void initialize(const ticker_data_t *ticker)

ticker->interface->init();

#if MBED_TRAP_ERRORS_ENABLED || COMPUTE_RATIO_FROM_FREQUENCY || !defined MBED_TICKER_CONSTANT_MASK
const ticker_info_t *info = ticker->interface->get_info();
#endif

#if !MBED_CONF_TARGET_CUSTOM_TICKERS && MBED_TRAP_ERRORS_ENABLED
/* They must be passing us one of the well-known tickers. Check info
* rather than the data, to cope with the lp_ticker_wrapper. It doesn't count
* as a "custom ticker" for the purpose of this optimization.
*
* This check has the downside of potentially pulling in code for an unused ticker.
* This is minimized by using direct xxx_ticker_get_info() calls rather than
* `get_us_ticker_data()->interface->get_info()` which would pull in the entire system,
* and we wrap it in `MBED_TRAP_ERRORS_ENABLED`.
*/
#if DEVICE_USTICKER && DEVICE_LPTICKER
MBED_ASSERT(info == us_ticker_get_info() || info == lp_ticker_get_info());
#elif DEVICE_USTICKER
MBED_ASSERT(info == us_ticker_get_info());
#elif DEVICE_LPTICKER
MBED_ASSERT(info == lp_ticker_get_info());
#else
MBED_ASSERT(false);
#endif
#endif

#if COMPUTE_RATIO_FROM_FREQUENCY
// Will need to use frequency computation for at least some cases, so always do it
// to minimise code size.
uint32_t frequency = info->frequency;
if (info->frequency == 0) {
if (frequency == 0) {
#if MBED_TRAP_ERRORS_ENABLED
MBED_ERROR(
MBED_MAKE_ERROR(
Expand All @@ -56,16 +169,27 @@ static void initialize(const ticker_data_t *ticker)
#endif // MBED_TRAP_ERRORS_ENABLED
}

uint8_t frequency_shifts = 0;
for (uint8_t i = 31; i > 0; --i) {
if ((1U << i) == frequency) {
frequency_shifts = i;
break;
}
}

const uint32_t period_gcd = gcd(frequency, 1000000);
ticker->queue->period_num = 1000000 / period_gcd;
ticker->queue->period_num_shifts = exact_log2(ticker->queue->period_num);
ticker->queue->period_den = frequency / period_gcd;
ticker->queue->period_den_shifts = exact_log2(ticker->queue->period_den);
#elif !MBED_TICKER_CONSTANT_PERIOD
// Have ratio defines, but need to figure out which one applies.
// `runs_in_deep_sleep` is a viable proxy. (We have asserts above that
// check that they're only passing usticker or lpticker).
const bool is_usticker = !DEVICE_LPTICKER || !ticker->interface->runs_in_deep_sleep;
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
ticker->queue->period_num = is_usticker ? US_TICKER_PERIOD_NUM : LP_TICKER_PERIOD_NUM;
#endif
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
ticker->queue->period_den = is_usticker ? US_TICKER_PERIOD_DEN : LP_TICKER_PERIOD_DEN;
#endif
#endif // COMPUTE_RATIO_FROM_FREQUENCY / MBED_TICKER_CONSTANT_PERIOD

#ifndef MBED_TICKER_CONSTANT_MASK
uint32_t bits = info->bits;
if ((info->bits > 32) || (info->bits < 4)) {
if ((bits > 32) || (bits < 4)) {
#if MBED_TRAP_ERRORS_ENABLED
MBED_ERROR(
MBED_MAKE_ERROR(
Expand All @@ -78,19 +202,24 @@ static void initialize(const ticker_data_t *ticker)
bits = 32;
#endif // MBED_TRAP_ERRORS_ENABLED
}
uint32_t max_delta = 0x7 << (bits - 4); // 7/16th
uint64_t max_delta_us =
((uint64_t)max_delta * 1000000 + frequency - 1) / frequency;
ticker->queue->bitmask = bits == 32 ? 0xFFFFFFFF : (1U << bits) - 1;
ticker->queue->max_delta = 7 << (bits - 4); // 7/16th
#else // MBED_TICKER_CONSTANT_MASK
#define CONSTANT_MAX_DELTA (7 * ((MBED_TICKER_CONSTANT_MASK >> 4) + 1)) // 7/16th
#endif // MBED_TICKER_CONSTANT_MASK

#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
ticker->queue->max_delta_us =
((uint64_t)TICKER_MAX_DELTA(ticker->queue) * TICKER_PERIOD_NUM(ticker->queue) + TICKER_PERIOD_DEN(ticker->queue) - 1) / TICKER_PERIOD_DEN(ticker->queue);
#else
#define CONSTANT_MAX_DELTA_US \
(((uint64_t)CONSTANT_MAX_DELTA * MBED_TICKER_CONSTANT_PERIOD_NUM + MBED_TICKER_CONSTANT_PERIOD_DEN - 1) / MBED_TICKER_CONSTANT_PERIOD_DEN)
#endif

ticker->queue->event_handler = NULL;
ticker->queue->head = NULL;
ticker->queue->tick_last_read = ticker->interface->read();
ticker->queue->tick_remainder = 0;
ticker->queue->frequency = frequency;
ticker->queue->frequency_shifts = frequency_shifts;
ticker->queue->bitmask = ((uint64_t)1 << bits) - 1;
ticker->queue->max_delta = max_delta;
ticker->queue->max_delta_us = max_delta_us;
ticker->queue->present_time = 0;
ticker->queue->dispatching = false;
ticker->queue->suspended = false;
Expand Down Expand Up @@ -154,27 +283,31 @@ static void update_present_time(const ticker_data_t *const ticker)
return;
}

uint64_t elapsed_ticks = (ticker_time - queue->tick_last_read) & queue->bitmask;
uint32_t elapsed_ticks = (ticker_time - queue->tick_last_read) & TICKER_BITMASK(queue);
queue->tick_last_read = ticker_time;

// Convert elapsed_ticks to elapsed_us as (elapsed_ticks * period_num / period_den)
// adding in any remainder from the last division
uint64_t scaled_ticks;
if (SLOW_MULTIPLY && TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
scaled_ticks = (uint64_t) elapsed_ticks << TICKER_PERIOD_NUM_SHIFTS(queue);
} else {
scaled_ticks = (uint64_t) elapsed_ticks * TICKER_PERIOD_NUM(queue);
}
uint64_t elapsed_us;
if (1000000 == queue->frequency) {
// Optimized for 1MHz

elapsed_us = elapsed_ticks;
if (TICKER_PERIOD_DEN_SHIFTS(queue) == 0) {
// Optimized for cases that don't need division
elapsed_us = scaled_ticks;
} else {
uint64_t us_x_ticks = elapsed_ticks * 1000000;
if (0 != queue->frequency_shifts) {
// Optimized for frequencies divisible by 2
elapsed_us = us_x_ticks >> queue->frequency_shifts;
queue->tick_remainder += us_x_ticks - (elapsed_us << queue->frequency_shifts);
scaled_ticks += queue->tick_remainder;
if (TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
// Speed-optimised for shifts
elapsed_us = scaled_ticks >> TICKER_PERIOD_DEN_SHIFTS(queue);
queue->tick_remainder = scaled_ticks - (elapsed_us << TICKER_PERIOD_DEN_SHIFTS(queue));
} else {
elapsed_us = us_x_ticks / queue->frequency;
queue->tick_remainder += us_x_ticks - elapsed_us * queue->frequency;
}
if (queue->tick_remainder >= queue->frequency) {
elapsed_us += 1;
queue->tick_remainder -= queue->frequency;
// General case division
elapsed_us = scaled_ticks / TICKER_PERIOD_DEN(queue);
queue->tick_remainder = scaled_ticks - elapsed_us * TICKER_PERIOD_DEN(queue);
}
}

Expand All @@ -190,25 +323,37 @@ static timestamp_t compute_tick_round_up(const ticker_data_t *const ticker, us_t
ticker_event_queue_t *queue = ticker->queue;
us_timestamp_t delta_us = timestamp - queue->present_time;

timestamp_t delta = ticker->queue->max_delta;
if (delta_us <= ticker->queue->max_delta_us) {
timestamp_t delta = TICKER_MAX_DELTA(ticker->queue);
if (delta_us <= TICKER_MAX_DELTA_US(ticker->queue)) {
// Checking max_delta_us ensures the operation will not overflow

if (1000000 == queue->frequency) {
// Optimized for 1MHz
delta = delta_us;
} else if (0 != queue->frequency_shifts) {
// Optimized frequencies divisible by 2
delta = ((delta_us << ticker->queue->frequency_shifts) + 1000000 - 1) / 1000000;
// Convert delta_us to delta (ticks) as (delta_us * period_den / period_num)
// taking care to round up if num != 1
uint64_t scaled_delta;
if (SLOW_MULTIPLY && TICKER_PERIOD_DEN_SHIFTS(queue) >= 0) {
// Optimized denominators divisible by 2
scaled_delta = delta_us << TICKER_PERIOD_DEN_SHIFTS(queue);
} else {
// General case
delta = (delta_us * queue->frequency + 1000000 - 1) / 1000000;
scaled_delta = delta_us * TICKER_PERIOD_DEN(queue);
}
if (TICKER_PERIOD_NUM_SHIFTS(queue) == 0) {
delta = scaled_delta;
} else {
scaled_delta += TICKER_PERIOD_NUM(queue) - 1;
if (TICKER_PERIOD_NUM_SHIFTS(queue) >= 0) {
// Optimized numerators divisible by 2
delta = scaled_delta >> TICKER_PERIOD_NUM_SHIFTS(queue);
} else {
// General case
delta = scaled_delta / TICKER_PERIOD_NUM(queue);
}
}
if (delta > ticker->queue->max_delta) {
delta = ticker->queue->max_delta;
if (delta > TICKER_MAX_DELTA(queue)) {
delta = TICKER_MAX_DELTA(queue);
}
}
return (queue->tick_last_read + delta) & queue->bitmask;
return (queue->tick_last_read + delta) & TICKER_BITMASK(queue);
}

//NOTE: Must be called from critical section!
Expand Down Expand Up @@ -308,7 +453,7 @@ static void schedule_interrupt(const ticker_data_t *const ticker)
}
} else {
uint32_t match_tick =
(queue->tick_last_read + queue->max_delta) & queue->bitmask;
(queue->tick_last_read + TICKER_MAX_DELTA(queue)) & TICKER_BITMASK(queue);
ticker->interface->set_interrupt(match_tick);
}
}
Expand Down
55 changes: 52 additions & 3 deletions hal/ticker_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,22 +70,71 @@ typedef struct {
bool runs_in_deep_sleep; /**< Whether ticker operates in deep sleep */
} ticker_interface_t;

/* Optimizations to avoid run-time computation if custom ticker support is disabled and
* there is exactly one of USTICKER or LPTICKER available, or if they have the same
* parameter value(s).
*/
#define MBED_TICKER_JUST_US (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_USTICKER && !DEVICE_LPTICKER)
#define MBED_TICKER_JUST_LP (!MBED_CONF_TARGET_CUSTOM_TICKERS && DEVICE_LPTICKER && !DEVICE_USTICKER)

#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_NUM) || \
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_NUM && defined LP_TICKER_PERIOD_NUM && \
US_TICKER_PERIOD_NUM == LP_TICKER_PERIOD_NUM)
#define MBED_TICKER_CONSTANT_PERIOD_NUM US_TICKER_PERIOD_NUM
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_NUM
#define MBED_TICKER_CONSTANT_PERIOD_NUM LP_TICKER_PERIOD_NUM
#endif

#if (MBED_TICKER_JUST_US && defined US_TICKER_PERIOD_DEN) || \
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_PERIOD_DEN && defined LP_TICKER_PERIOD_DEN && \
US_TICKER_PERIOD_DEN == LP_TICKER_PERIOD_DEN)
#define MBED_TICKER_CONSTANT_PERIOD_DEN US_TICKER_PERIOD_DEN
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_PERIOD_DEN
#define MBED_TICKER_CONSTANT_PERIOD_DEN LP_TICKER_PERIOD_DEN
#endif

#if defined MBED_TICKER_CONSTANT_PERIOD_NUM && defined MBED_TICKER_CONSTANT_PERIOD_DEN
#define MBED_TICKER_CONSTANT_PERIOD
#endif

#if (MBED_TICKER_JUST_US && defined US_TICKER_MASK) || \
(!MBED_CONF_TARGET_CUSTOM_TICKERS && defined US_TICKER_MASK && defined LP_TICKER_MASK && \
US_TICKER_MASK == LP_TICKER_MASK)
#define MBED_TICKER_CONSTANT_MASK US_TICKER_MASK
#elif MBED_TICKER_JUST_LP && defined LP_TICKER_MASK
#define MBED_TICKER_CONSTANT_MASK LP_TICKER_MASK
#endif

/** Ticker's event queue structure
*/
typedef struct {
ticker_event_handler event_handler; /**< Event handler */
ticker_event_t *head; /**< A pointer to head */
uint32_t frequency; /**< Frequency of the timer in Hz */
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
uint32_t period_num; /**< Ratio of period to 1us, numerator */
#endif
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
uint32_t period_den; /**< Ratio of period to 1us, denominator */
#endif
#ifndef MBED_TICKER_CONSTANT_MASK
uint32_t bitmask; /**< Mask to be applied to time values read */
uint32_t max_delta; /**< Largest delta in ticks that can be used when scheduling */
#endif
#if !(defined MBED_TICKER_CONSTANT_PERIOD && defined MBED_TICKER_CONSTANT_MASK)
uint64_t max_delta_us; /**< Largest delta in us that can be used when scheduling */
#endif
uint32_t tick_last_read; /**< Last tick read */
uint64_t tick_remainder; /**< Ticks that have not been added to base_time */
uint32_t tick_remainder; /**< Ticks that have not been added to base_time */
us_timestamp_t present_time; /**< Store the timestamp used for present time */
bool initialized; /**< Indicate if the instance is initialized */
bool dispatching; /**< The function ticker_irq_handler is dispatching */
bool suspended; /**< Indicate if the instance is suspended */
uint8_t frequency_shifts; /**< If frequency is a value of 2^n, this is n, otherwise 0 */
#ifndef MBED_TICKER_CONSTANT_PERIOD_NUM
int8_t period_num_shifts; /**< If numerator is a value of 2^n, this is n, otherwise -1 */
#endif
#ifndef MBED_TICKER_CONSTANT_PERIOD_DEN
int8_t period_den_shifts; /**< If denominator is a value of 2^n, this is n, otherwise -1 */
#endif
} ticker_event_queue_t;

/** Ticker's data structure
Expand Down
4 changes: 4 additions & 0 deletions targets/targets.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
"help": "Initialize the microsecond ticker at boot rather than on first use, and leave it initialized. This speeds up wait_us in particular.",
"value": false
},
"custom-tickers": {
"help": "Support custom tickers in addition to USTICKER and LPTICKER. Turning this off can permit some space and speed optimisations, if characteristics of USTICKER and LPTICKER are known at compile time.",
"value": true
},
"xip-enable": {
"help": "Enable Execute In Place (XIP) on this target. Value is only significant if the board has executable external storage such as QSPIF. If this is enabled, customize the linker file to choose what text segments are placed on external storage",
"value": false
Expand Down

0 comments on commit ac06508

Please sign in to comment.