Skip to content

Commit

Permalink
Analyze On-CPU and Off-CPU time based on scheduler switch events
Browse files Browse the repository at this point in the history
When we open a perf data file that contains scheduler switch events,
i.e. a data file generated via `perf record --switch-events ...`,
hotspot will now measure the per-thread and overall off-CPU and
on-CPU times. This already gives some interesting insight into
whether your application is CPU bound, or rather suffering from
I/O, locking or similar issues.

Note that this is just a first step towards solving issue #8. For
proper off-CPU profiling, we also need to get backtraces to figure
out where the sleeping comes from. Additionally, we will need to
extend the record page to add the `--switch-events` switch by
default.
  • Loading branch information
milianw committed Nov 16, 2017
1 parent 16d77af commit 7fd97d4
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 16 deletions.
12 changes: 11 additions & 1 deletion src/models/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,14 +450,24 @@ struct Event

using Events = QVector<Event>;

constexpr auto MAX_TIME = std::numeric_limits<quint64>::max();

struct ThreadEvents
{
qint32 pid = 0;
qint32 tid = 0;
quint64 timeStart = 0;
quint64 timeEnd = std::numeric_limits<quint64>::max();
quint64 timeEnd = MAX_TIME;
Events events;
QString name;
quint64 lastSwitchTime = MAX_TIME;
quint64 offCpuTime = 0;
enum State {
Unknown,
OnCpu,
OffCpu
};
State state = Unknown;
};

struct EventResults
Expand Down
34 changes: 26 additions & 8 deletions src/models/eventmodel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,28 @@ QVariant EventModel::data(const QModelIndex& index, int role) const
if (role == Qt::DisplayRole) {
return tr("%1 (#%2)").arg(thread.name, QString::number(thread.tid));
} else if (role == Qt::ToolTipRole) {
return tr("Thread %1 (tid=%2, pid=%3) ran for %4 (%5% of total runtime).\n"
"It produced %6 events (%7% of the total events).")
.arg(thread.name, QString::number(thread.tid), QString::number(thread.pid),
Util::formatTimeString(thread.timeEnd - thread.timeStart),
Util::formatCostRelative(thread.timeEnd - thread.timeStart,
m_maxTime - m_minTime),
QString::number(thread.events.size()),
Util::formatCostRelative(thread.events.size(), m_totalEvents));
QString tooltip = tr("Thread %1, tid = %2, pid = %3\n")
.arg(thread.name, QString::number(thread.tid), QString::number(thread.pid));
const auto runtime = thread.timeEnd - thread.timeStart;
const auto totalRuntime = m_maxTime - m_minTime;
tooltip += tr("Runtime: %1 (%2% of total runtime)\n")
.arg(Util::formatTimeString(runtime),
Util::formatCostRelative(runtime, totalRuntime));
if (m_totalOffCpuTime > 0) {
const auto onCpuTime = runtime - thread.offCpuTime;
tooltip += tr("On-CPU time: %1 (%2% of thread runtime, %3% of total On-CPU time)\n")
.arg(Util::formatTimeString(onCpuTime),
Util::formatCostRelative(onCpuTime, runtime),
Util::formatCostRelative(onCpuTime, m_totalOnCpuTime));
tooltip += tr("Off-CPU time: %1 (%2% of thread runtime, %3% of total Off-CPU time)\n")
.arg(Util::formatTimeString(thread.offCpuTime),
Util::formatCostRelative(thread.offCpuTime, runtime),
Util::formatCostRelative(thread.offCpuTime, m_totalOffCpuTime));
}
tooltip += tr("Number of Events: %1 (%2% of the total)")
.arg(QString::number(thread.events.size()),
Util::formatCostRelative(thread.events.size(), m_totalEvents));
return tooltip;
}
break;
case EventsColumn:
Expand All @@ -146,6 +160,8 @@ void EventModel::setData(const Data::EventResults& data)
m_maxCost = 0;
m_numProcesses = 0;
m_numThreads = 0;
m_totalOnCpuTime = 0;
m_totalOffCpuTime = 0;
if (data.threads.isEmpty()) {
m_minTime = 0;
m_maxTime = 0;
Expand All @@ -157,6 +173,8 @@ void EventModel::setData(const Data::EventResults& data)
for (const auto& thread : data.threads) {
m_minTime = std::min(thread.timeStart, m_minTime);
m_maxTime = std::max(thread.timeEnd, m_maxTime);
m_totalOffCpuTime += thread.offCpuTime;
m_totalOnCpuTime += thread.timeEnd - thread.timeStart - thread.offCpuTime;
m_totalEvents += thread.events.size();
processes.insert(thread.pid);
threads.insert(thread.tid);
Expand Down
2 changes: 2 additions & 0 deletions src/models/eventmodel.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class EventModel : public QAbstractTableModel
Data::EventResults m_data;
quint64 m_minTime = 0;
quint64 m_maxTime = 0;
quint64 m_totalOnCpuTime = 0;
quint64 m_totalOffCpuTime = 0;
quint64 m_totalEvents = 0;
quint64 m_maxCost = 0;
uint m_numProcesses = 0;
Expand Down
3 changes: 3 additions & 0 deletions src/models/summarydata.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ struct SummaryData
QString cpuSiblingCores;
QString cpuSiblingThreads;
quint64 totalMemoryInKiB = 0;
// only non-zero when perf record --switch-events was used
quint64 onCpuTime = 0;
quint64 offCpuTime = 0;

// total number of samples
quint64 sampleCount = 0;
Expand Down
59 changes: 54 additions & 5 deletions src/parsers/perf/perfparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,26 @@ QDebug operator<<(QDebug stream, const Sample& sample)
return stream;
}

struct ContextSwitchDefinition : Record
{
bool switchOut = false;
};

QDataStream& operator>>(QDataStream& stream, ContextSwitchDefinition& contextSwitch)
{
return stream >> static_cast<Record&>(contextSwitch)
>> contextSwitch.switchOut;
}

QDebug operator<<(QDebug stream, const ContextSwitchDefinition& contextSwitch)
{
stream.noquote().nospace() << "ContextSwitchDefinition{"
<< static_cast<const Record&>(contextSwitch) << ", "
<< "switchOut=" << contextSwitch.switchOut
<< "}";
return stream.space();
}

struct StringDefinition
{
qint32 id = 0;
Expand Down Expand Up @@ -722,6 +742,13 @@ struct PerfParserPrivate
addError(error);
break;
}
case EventType::ContextSwitchDefinition: {
ContextSwitchDefinition contextSwitch;
stream >> contextSwitch;
qCDebug(LOG_PERFPARSER) << "parsed:" << contextSwitch;
addContextSwitch(contextSwitch);
break;
}
case EventType::Progress: {
float progress = 0;
stream >> progress;
Expand All @@ -746,7 +773,9 @@ struct PerfParserPrivate
{
Data::BottomUp::initializeParents(&bottomUpResult.root);

calculateSummary();
summaryResult.applicationRunningTime = applicationEndTime - applicationStartTime;
summaryResult.threadCount = uniqueThreads.size();
summaryResult.processCount = uniqueProcess.size();

buildTopDownResult();
buildCallerCalleeResult();
Expand All @@ -764,6 +793,16 @@ struct PerfParserPrivate
if (thread.name.isEmpty()) {
thread.name = PerfParser::tr("#%1").arg(thread.tid);
}

// we may have been switched out before detaching perf, so increment
// the off-CPU time in this case
if (thread.state == Data::ThreadEvents::OffCpu) {
thread.offCpuTime += thread.timeEnd - thread.lastSwitchTime;
}

const auto runTime = thread.timeEnd - thread.timeStart;
summaryResult.offCpuTime += thread.offCpuTime;
summaryResult.onCpuTime += runTime - thread.offCpuTime;
}
}

Expand Down Expand Up @@ -947,11 +986,20 @@ struct PerfParserPrivate
}
}

void calculateSummary()
void addContextSwitch(const ContextSwitchDefinition& contextSwitch)
{
summaryResult.applicationRunningTime = applicationEndTime - applicationStartTime;
summaryResult.threadCount = uniqueThreads.size();
summaryResult.processCount = uniqueProcess.size();
auto* thread = eventResult.findThread(contextSwitch.pid, contextSwitch.tid);
if (!thread) {
return;
}

if (!contextSwitch.switchOut && thread->state == Data::ThreadEvents::OffCpu) {
thread->offCpuTime += contextSwitch.time - thread->lastSwitchTime;
}
thread->lastSwitchTime = contextSwitch.time;
thread->state = contextSwitch.switchOut
? Data::ThreadEvents::OffCpu
: Data::ThreadEvents::OnCpu;
}

void addLost(const LostDefinition& /*lost*/)
Expand Down Expand Up @@ -1012,6 +1060,7 @@ struct PerfParserPrivate
Error,
Sample,
Progress,
ContextSwitchDefinition,
InvalidType
};

Expand Down
8 changes: 6 additions & 2 deletions src/resultssummarypage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,12 @@ ResultsSummaryPage::ResultsSummaryPage(PerfParser *parser, QWidget *parent)
QTextStream stream(&summaryText);
stream << "<qt><table>"
<< formatSummaryText(tr("Command"), QLatin1String("<tt>") + data.command.toHtmlEscaped() + QLatin1String("</tt>"))
<< formatSummaryText(tr("Run Time"), Util::formatTimeString(data.applicationRunningTime))
<< formatSummaryText(tr("Processes"), QString::number(data.processCount))
<< formatSummaryText(tr("Run Time"), Util::formatTimeString(data.applicationRunningTime));
if (data.offCpuTime > 0 || data.onCpuTime > 0) {
stream << formatSummaryText(tr("On CPU Time"), Util::formatTimeString(data.onCpuTime))
<< formatSummaryText(tr("Off CPU Time"), Util::formatTimeString(data.offCpuTime));
}
stream << formatSummaryText(tr("Processes"), QString::number(data.processCount))
<< formatSummaryText(tr("Threads"), QString::number(data.threadCount))
<< formatSummaryText(tr("Total Samples"), tr("%1 (%4)")
.arg(QString::number(data.sampleCount), Util::formatFrequency(data.sampleCount, data.applicationRunningTime)));
Expand Down

0 comments on commit 7fd97d4

Please sign in to comment.