Skip to content

Commit

Permalink
Support off-CPU profiling
Browse files Browse the repository at this point in the history
When we encounter a perf data file that was recorded with

    perf record --switch-events -e sched:sched_switch --call-graph dwarf

then we now take the off-CPU time from the switch events and
the call stack from the sched_switch events to support off-CPU
profiling. I.e. we will show both the number of scheduler switches
in the sched:sched_switch column, as well as the off-CPU time in
a new column.

In the timeline, the off-CPU time is always highlighted in red and
can be hovered to inspect the sched switch delay.

Fixes #8
  • Loading branch information
milianw committed Dec 29, 2017
1 parent 9e148c9 commit 2a8de11
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 27 deletions.
1 change: 1 addition & 0 deletions src/models/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,7 @@ struct EventResults
QVector<ThreadEvents> threads;
QVector<QVector<qint32>> stacks;
QVector<CostSummary> totalCosts;
qint32 offCpuTimeCostId = -1;

ThreadEvents* findThread(qint32 pid, qint32 tid);
};
Expand Down
81 changes: 66 additions & 15 deletions src/models/timelinedelegate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ void TimeLineDelegate::paint(QPainter* painter, const QStyleOptionViewItem& opti
const QModelIndex& index) const
{
const auto data = dataFromIndex(index, option.rect, m_zoomStack);
const auto offCpuCostId = index.data(EventModel::EventResultsRole).value<Data::EventResults>().offCpuTimeCostId;
const bool is_alternate = option.features & QStyleOptionViewItem::Alternate;
const auto& palette = option.palette;

Expand Down Expand Up @@ -192,9 +193,21 @@ void TimeLineDelegate::paint(QPainter* painter, const QStyleOptionViewItem& opti
QPen pen(scheme.foreground(KColorScheme::NeutralText), 1);
painter->setPen(pen);
painter->setBrush({});
auto offCpuColor = scheme.background(KColorScheme::NegativeBackground).color();

int last_x = -1;
if (offCpuCostId != -1) {
for (const auto& event : data.events) {
if (event.type != offCpuCostId) {
continue;
}

const auto x = data.mapTimeToX(event.time);
const auto x2 = data.mapTimeToX(event.time + event.cost);
painter->fillRect(x, 0, x2 - x, data.h, offCpuColor);
}
}

int last_x = -1;
// TODO: accumulate cost for events that fall to the same pixel somehow
// but how to then sync the y scale across different delegates?
// somehow deduce threshold via min time delta and max cost?
Expand Down Expand Up @@ -249,28 +262,66 @@ bool TimeLineDelegate::helpEvent(QHelpEvent* event, QAbstractItemView* view,
const auto localX = event->pos().x();
const auto mappedX = localX - option.rect.x() - data.padding;
const auto time = data.mapXToTime(mappedX);
auto it = findEvent(data.events.constBegin(), data.events.constEnd(), time);
const auto start = findEvent(data.events.constBegin(), data.events.constEnd(), time);
// find the maximum sample cost in the range spanned by one pixel
uint numSamples = 0;
quint64 maxCost = 0;
quint64 totalCost = 0;
while (it != data.events.constEnd() && data.mapTimeToX(it->time) == mappedX) {
if (it->type != m_eventType) {
struct FoundSamples {
uint numSamples = 0;
quint64 maxCost = 0;
quint64 totalCost = 0;
int type = -1;
};
auto findSamples = [&](int costType, bool contains) -> FoundSamples {
FoundSamples ret;
ret.type = costType;
auto it = start;
if (contains) {
// for a contains check, we must only include events for the correct type
// otherwise we might skip the sched switch e.g.
while (it->type != costType && it != data.events.constBegin()) {
--it;
}
}
while (it != data.events.constEnd()) {
if (it->type != costType) {
++it;
continue;
}
const auto timeX = data.mapTimeToX(it->time);
if (timeX > mappedX) {
// event lies to the right of the selected time
break;
} else if (contains && mappedX > data.mapTimeToX(it->time + it->cost)) {
// event lies to the left of the selected time
++it;
continue;
}
Q_ASSERT(contains || mappedX == timeX);
++ret.numSamples;
ret.maxCost = std::max(ret.maxCost, it->cost);
ret.totalCost += it->cost;
++it;
continue;
}
++numSamples;
maxCost = std::max(maxCost, it->cost);
totalCost += it->cost;
++it;
return ret;
};
auto found = findSamples(m_eventType, false);
const auto offCpuCostId = index.data(EventModel::EventResultsRole).value<Data::EventResults>().offCpuTimeCostId;
if (offCpuCostId != -1 && !found.numSamples) {
// check whether we are hovering an off-CPU area
found = findSamples(offCpuCostId, true);
}

const auto formattedTime = Util::formatTimeString(time - data.minTime);
const auto totalCosts = index.data(EventModel::TotalCostsRole).value<QVector<Data::CostSummary>>();
if (numSamples > 0) {
if (found.numSamples > 0 && found.type == offCpuCostId) {
QToolTip::showText(event->globalPos(), tr("time: %1\nsched switches: %2\ntotal off-CPU time: %3\nlongest sched switch: %4")
.arg(formattedTime, QString::number(found.numSamples),
Util::formatTimeString(found.totalCost),
Util::formatTimeString(found.maxCost)));
} else if (found.numSamples > 0) {
QToolTip::showText(event->globalPos(), tr("time: %1\n%5 samples: %2\ntotal sample cost: %3\nmax sample cost: %4")
.arg(formattedTime).arg(numSamples)
.arg(totalCost).arg(maxCost).arg(totalCosts.value(m_eventType).label));
.arg(formattedTime, QString::number(found.numSamples),
Util::formatCost(found.totalCost), Util::formatCost(found.maxCost),
totalCosts.value(found.type).label));
} else {
QToolTip::showText(event->globalPos(), tr("time: %1 (no %2 samples)")
.arg(formattedTime, totalCosts.value(m_eventType).label));
Expand Down
44 changes: 43 additions & 1 deletion src/parsers/perf/perfparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1054,8 +1054,49 @@ struct PerfParserPrivate
}

if (!contextSwitch.switchOut && thread->state == Data::ThreadEvents::OffCpu) {
thread->offCpuTime += contextSwitch.time - thread->lastSwitchTime;
const auto switchTime = contextSwitch.time - thread->lastSwitchTime;
thread->offCpuTime += switchTime;

if (eventResult.offCpuTimeCostId == -1) {
eventResult.offCpuTimeCostId = addCostType(PerfParser::tr("off-CPU Time"));
}

auto& totalCost = summaryResult.costs[eventResult.offCpuTimeCostId];
totalCost.sampleCount++;
totalCost.totalPeriod += switchTime;

qint32 stackId = -1;
if (!thread->events.isEmpty() && m_schedSwitchCostId != -1) {
auto it = std::find_if(thread->events.rbegin(), thread->events.rend(),
[this](const Data::Event& event) {
return event.type == m_schedSwitchCostId;
});
if (it != thread->events.rend()) {
stackId = it->stackId;
}
}
if (stackId != -1) {
const auto& frames = eventResult.stacks[stackId];
QSet<Data::Symbol> recursionGuard;
auto frameCallback = [this, &recursionGuard, switchTime]
(const Data::Symbol& symbol, const Data::Location& location)
{
addCallerCalleeEvent(symbol, location,
eventResult.offCpuTimeCostId, switchTime,
&recursionGuard, &callerCalleeResult,
bottomUpResult.costs.numTypes());
};
bottomUpResult.addEvent(eventResult.offCpuTimeCostId, switchTime, frames, frameCallback);
}

Data::Event event;
event.time = thread->lastSwitchTime;
event.cost = switchTime;
event.type = eventResult.offCpuTimeCostId;
event.stackId = stackId;
thread->events.push_back(event);
}

thread->lastSwitchTime = contextSwitch.time;
thread->state = contextSwitch.switchOut
? Data::ThreadEvents::OffCpu
Expand Down Expand Up @@ -1151,6 +1192,7 @@ struct PerfParserPrivate
QHash<qint32, qint32> attributeIdsToCostIds;
QHash<int, qint32> attributeNameToCostIds;
qint32 m_nextCostId = 0;
qint32 m_schedSwitchCostId = -1;
};

PerfParser::PerfParser(QObject* parent)
Expand Down
2 changes: 1 addition & 1 deletion src/resultspage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ ResultsPage::ResultsPage(PerfParser *parser, QWidget *parent)
int typeId = -1;
for (const auto& type : data.totalCosts) {
++typeId;
if (!type.sampleCount) {
if (!type.sampleCount || typeId == data.offCpuTimeCostId) {
continue;
}
ui->timeLineEventSource->addItem(type.label, typeId);
Expand Down
69 changes: 59 additions & 10 deletions tests/integrationtests/tst_perfparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,37 +64,41 @@ do {\

namespace {
template<typename T>
bool searchForChildSymbol(const T& root, const QString& searchString)
bool searchForChildSymbol(const T& root, const QString& searchString, bool exact = true)
{
if (root.symbol.symbol == searchString) {
if (exact && root.symbol.symbol == searchString) {
return true;
} else if (!exact && root.symbol.symbol.contains(searchString)) {
return true;
} else {
for (auto entry : root.children) {
if (searchForChildSymbol(entry, searchString)) {
if (searchForChildSymbol(entry, searchString, exact)) {
return true;
}
}
}
return false;
}

bool compareCosts(const Data::TopDown &lhs, const Data::TopDown &rhs, const Data::TopDownResults &results)
bool compareCosts(const Data::TopDown &lhs, const Data::TopDown &rhs, const Data::TopDownResults &results,
int costIndex)
{
return results.inclusiveCosts.cost(0, lhs.id) < results.inclusiveCosts.cost(0, rhs.id);
return results.inclusiveCosts.cost(costIndex, lhs.id) < results.inclusiveCosts.cost(costIndex, rhs.id);
}

bool compareCosts(const Data::BottomUp &lhs, const Data::BottomUp &rhs, const Data::BottomUpResults &results)
bool compareCosts(const Data::BottomUp &lhs, const Data::BottomUp &rhs, const Data::BottomUpResults &results,
int costIndex)
{
return results.costs.cost(0, lhs.id) < results.costs.cost(0, rhs.id);
return results.costs.cost(costIndex, lhs.id) < results.costs.cost(costIndex, rhs.id);
}

template<typename Results>
int maxElementTopIndex(const Results &collection)
int maxElementTopIndex(const Results &collection, int costIndex = 0)
{
using DataType = decltype(*collection.root.children.begin());
auto topResult = std::max_element(collection.root.children.constBegin(), collection.root.children.constEnd(),
[collection](const DataType &lhs, const DataType &rhs) {
return compareCosts(lhs, rhs, collection);
[collection, costIndex](const DataType &lhs, const DataType &rhs) {
return compareCosts(lhs, rhs, collection, costIndex);
});
return std::distance(collection.root.children.begin(), topResult);
}
Expand Down Expand Up @@ -369,7 +373,52 @@ private slots:
QVERIFY((thread.timeEnd - thread.timeStart) > thread.offCpuTime);
}
}

void testOffCpu()
{
if (!canTrace("events/sched/sched_switch")) {
QSKIP("cannot access sched_switch trace points. execute the following to run this test:\n"
" sudo mount -o remount,mode=755 /sys/kernel/debug{,/tracing} with mode=755");
}

const QStringList perfOptions = {"--call-graph", "dwarf", "--switch-events", "-e", "cycles", "-e", "sched:sched_switch"};

const QString exePath = qApp->applicationDirPath() + "/../tests/test-clients/cpp-sleep/cpp-sleep";

QTemporaryFile tempFile;
tempFile.open();

perfRecord(perfOptions, exePath, {}, tempFile.fileName());
testPerfData(Data::Symbol{"hypot", "libm"}, Data::Symbol{"start", "cpp-sleep"}, tempFile.fileName(), false);

QCOMPARE(m_bottomUpData.costs.numTypes(), 3);
QCOMPARE(m_bottomUpData.costs.typeName(0), "cycles");
QCOMPARE(m_bottomUpData.costs.typeName(1), "sched:sched_switch");
QCOMPARE(m_bottomUpData.costs.typeName(2), "off-CPU Time");

// find sched switch hotspot
int bottomUpTopIndex = maxElementTopIndex(m_bottomUpData, 1);
QVERIFY(bottomUpTopIndex != -1);

// should be the same as off-cpu hotspot
QCOMPARE(bottomUpTopIndex, maxElementTopIndex(m_bottomUpData, 2));

const auto topBottomUp = m_bottomUpData.root.children[bottomUpTopIndex];
QVERIFY(topBottomUp.symbol.symbol.contains("schedule"));
QVERIFY(topBottomUp.symbol.binary.contains("kernel"));
QVERIFY(searchForChildSymbol(topBottomUp, "std::this_thread::sleep_for", false));

QVERIFY(m_bottomUpData.costs.cost(1, topBottomUp.id) >= 10); // at least 10 sched switches
QVERIFY(m_bottomUpData.costs.cost(2, topBottomUp.id) >= 1E9); // at least 1s sleep time
}

private:
static bool canTrace(const QString& path)
{
QFileInfo info("/sys/kernel/debug/tracing/" + path);
return info.isDir() && info.isReadable();
}

Data::Summary m_summaryData;
Data::BottomUpResults m_bottomUpData;
Data::TopDownResults m_topDownData;
Expand Down

0 comments on commit 2a8de11

Please sign in to comment.