From 7fd97d44a69ae42a495d988fd9f414ea7e82b62c Mon Sep 17 00:00:00 2001 From: Milian Wolff Date: Wed, 15 Nov 2017 22:26:47 +0100 Subject: [PATCH] Analyze On-CPU and Off-CPU time based on scheduler switch events When we open a perf data file that contains scheduler switch events, i.e. a data file generated via `perf record --switch-events ...`, hotspot will now measure the per-thread and overall off-CPU and on-CPU times. This already gives some interesting insight into whether your application is CPU bound, or rather suffering from I/O, locking or similar issues. Note that this is just a first step towards solving issue #8. For proper off-CPU profiling, we also need to get backtraces to figure out where the sleeping comes from. Additionally, we will need to extend the record page to add the `--switch-events` switch by default. --- src/models/data.h | 12 ++++++- src/models/eventmodel.cpp | 34 ++++++++++++++----- src/models/eventmodel.h | 2 ++ src/models/summarydata.h | 3 ++ src/parsers/perf/perfparser.cpp | 59 ++++++++++++++++++++++++++++++--- src/resultssummarypage.cpp | 8 +++-- 6 files changed, 102 insertions(+), 16 deletions(-) diff --git a/src/models/data.h b/src/models/data.h index e5f53ee3..cf653967 100644 --- a/src/models/data.h +++ b/src/models/data.h @@ -450,14 +450,24 @@ struct Event using Events = QVector; +constexpr auto MAX_TIME = std::numeric_limits::max(); + struct ThreadEvents { qint32 pid = 0; qint32 tid = 0; quint64 timeStart = 0; - quint64 timeEnd = std::numeric_limits::max(); + quint64 timeEnd = MAX_TIME; Events events; QString name; + quint64 lastSwitchTime = MAX_TIME; + quint64 offCpuTime = 0; + enum State { + Unknown, + OnCpu, + OffCpu + }; + State state = Unknown; }; struct EventResults diff --git a/src/models/eventmodel.cpp b/src/models/eventmodel.cpp index f2625553..71258357 100644 --- a/src/models/eventmodel.cpp +++ b/src/models/eventmodel.cpp @@ -116,14 +116,28 @@ QVariant EventModel::data(const QModelIndex& index, int role) const if (role == Qt::DisplayRole) { return tr("%1 (#%2)").arg(thread.name, QString::number(thread.tid)); } else if (role == Qt::ToolTipRole) { - return tr("Thread %1 (tid=%2, pid=%3) ran for %4 (%5% of total runtime).\n" - "It produced %6 events (%7% of the total events).") - .arg(thread.name, QString::number(thread.tid), QString::number(thread.pid), - Util::formatTimeString(thread.timeEnd - thread.timeStart), - Util::formatCostRelative(thread.timeEnd - thread.timeStart, - m_maxTime - m_minTime), - QString::number(thread.events.size()), - Util::formatCostRelative(thread.events.size(), m_totalEvents)); + QString tooltip = tr("Thread %1, tid = %2, pid = %3\n") + .arg(thread.name, QString::number(thread.tid), QString::number(thread.pid)); + const auto runtime = thread.timeEnd - thread.timeStart; + const auto totalRuntime = m_maxTime - m_minTime; + tooltip += tr("Runtime: %1 (%2% of total runtime)\n") + .arg(Util::formatTimeString(runtime), + Util::formatCostRelative(runtime, totalRuntime)); + if (m_totalOffCpuTime > 0) { + const auto onCpuTime = runtime - thread.offCpuTime; + tooltip += tr("On-CPU time: %1 (%2% of thread runtime, %3% of total On-CPU time)\n") + .arg(Util::formatTimeString(onCpuTime), + Util::formatCostRelative(onCpuTime, runtime), + Util::formatCostRelative(onCpuTime, m_totalOnCpuTime)); + tooltip += tr("Off-CPU time: %1 (%2% of thread runtime, %3% of total Off-CPU time)\n") + .arg(Util::formatTimeString(thread.offCpuTime), + Util::formatCostRelative(thread.offCpuTime, runtime), + Util::formatCostRelative(thread.offCpuTime, m_totalOffCpuTime)); + } + tooltip += tr("Number of Events: %1 (%2% of the total)") + .arg(QString::number(thread.events.size()), + Util::formatCostRelative(thread.events.size(), m_totalEvents)); + return tooltip; } break; case EventsColumn: @@ -146,6 +160,8 @@ void EventModel::setData(const Data::EventResults& data) m_maxCost = 0; m_numProcesses = 0; m_numThreads = 0; + m_totalOnCpuTime = 0; + m_totalOffCpuTime = 0; if (data.threads.isEmpty()) { m_minTime = 0; m_maxTime = 0; @@ -157,6 +173,8 @@ void EventModel::setData(const Data::EventResults& data) for (const auto& thread : data.threads) { m_minTime = std::min(thread.timeStart, m_minTime); m_maxTime = std::max(thread.timeEnd, m_maxTime); + m_totalOffCpuTime += thread.offCpuTime; + m_totalOnCpuTime += thread.timeEnd - thread.timeStart - thread.offCpuTime; m_totalEvents += thread.events.size(); processes.insert(thread.pid); threads.insert(thread.tid); diff --git a/src/models/eventmodel.h b/src/models/eventmodel.h index b3c7161c..906d114f 100644 --- a/src/models/eventmodel.h +++ b/src/models/eventmodel.h @@ -73,6 +73,8 @@ class EventModel : public QAbstractTableModel Data::EventResults m_data; quint64 m_minTime = 0; quint64 m_maxTime = 0; + quint64 m_totalOnCpuTime = 0; + quint64 m_totalOffCpuTime = 0; quint64 m_totalEvents = 0; quint64 m_maxCost = 0; uint m_numProcesses = 0; diff --git a/src/models/summarydata.h b/src/models/summarydata.h index d1abe0ce..b6ee092e 100644 --- a/src/models/summarydata.h +++ b/src/models/summarydata.h @@ -49,6 +49,9 @@ struct SummaryData QString cpuSiblingCores; QString cpuSiblingThreads; quint64 totalMemoryInKiB = 0; + // only non-zero when perf record --switch-events was used + quint64 onCpuTime = 0; + quint64 offCpuTime = 0; // total number of samples quint64 sampleCount = 0; diff --git a/src/parsers/perf/perfparser.cpp b/src/parsers/perf/perfparser.cpp index 75922e40..316f4ace 100644 --- a/src/parsers/perf/perfparser.cpp +++ b/src/parsers/perf/perfparser.cpp @@ -293,6 +293,26 @@ QDebug operator<<(QDebug stream, const Sample& sample) return stream; } +struct ContextSwitchDefinition : Record +{ + bool switchOut = false; +}; + +QDataStream& operator>>(QDataStream& stream, ContextSwitchDefinition& contextSwitch) +{ + return stream >> static_cast(contextSwitch) + >> contextSwitch.switchOut; +} + +QDebug operator<<(QDebug stream, const ContextSwitchDefinition& contextSwitch) +{ + stream.noquote().nospace() << "ContextSwitchDefinition{" + << static_cast(contextSwitch) << ", " + << "switchOut=" << contextSwitch.switchOut + << "}"; + return stream.space(); +} + struct StringDefinition { qint32 id = 0; @@ -722,6 +742,13 @@ struct PerfParserPrivate addError(error); break; } + case EventType::ContextSwitchDefinition: { + ContextSwitchDefinition contextSwitch; + stream >> contextSwitch; + qCDebug(LOG_PERFPARSER) << "parsed:" << contextSwitch; + addContextSwitch(contextSwitch); + break; + } case EventType::Progress: { float progress = 0; stream >> progress; @@ -746,7 +773,9 @@ struct PerfParserPrivate { Data::BottomUp::initializeParents(&bottomUpResult.root); - calculateSummary(); + summaryResult.applicationRunningTime = applicationEndTime - applicationStartTime; + summaryResult.threadCount = uniqueThreads.size(); + summaryResult.processCount = uniqueProcess.size(); buildTopDownResult(); buildCallerCalleeResult(); @@ -764,6 +793,16 @@ struct PerfParserPrivate if (thread.name.isEmpty()) { thread.name = PerfParser::tr("#%1").arg(thread.tid); } + + // we may have been switched out before detaching perf, so increment + // the off-CPU time in this case + if (thread.state == Data::ThreadEvents::OffCpu) { + thread.offCpuTime += thread.timeEnd - thread.lastSwitchTime; + } + + const auto runTime = thread.timeEnd - thread.timeStart; + summaryResult.offCpuTime += thread.offCpuTime; + summaryResult.onCpuTime += runTime - thread.offCpuTime; } } @@ -947,11 +986,20 @@ struct PerfParserPrivate } } - void calculateSummary() + void addContextSwitch(const ContextSwitchDefinition& contextSwitch) { - summaryResult.applicationRunningTime = applicationEndTime - applicationStartTime; - summaryResult.threadCount = uniqueThreads.size(); - summaryResult.processCount = uniqueProcess.size(); + auto* thread = eventResult.findThread(contextSwitch.pid, contextSwitch.tid); + if (!thread) { + return; + } + + if (!contextSwitch.switchOut && thread->state == Data::ThreadEvents::OffCpu) { + thread->offCpuTime += contextSwitch.time - thread->lastSwitchTime; + } + thread->lastSwitchTime = contextSwitch.time; + thread->state = contextSwitch.switchOut + ? Data::ThreadEvents::OffCpu + : Data::ThreadEvents::OnCpu; } void addLost(const LostDefinition& /*lost*/) @@ -1012,6 +1060,7 @@ struct PerfParserPrivate Error, Sample, Progress, + ContextSwitchDefinition, InvalidType }; diff --git a/src/resultssummarypage.cpp b/src/resultssummarypage.cpp index 7ad0ad0e..c63a51e3 100644 --- a/src/resultssummarypage.cpp +++ b/src/resultssummarypage.cpp @@ -87,8 +87,12 @@ ResultsSummaryPage::ResultsSummaryPage(PerfParser *parser, QWidget *parent) QTextStream stream(&summaryText); stream << "" << formatSummaryText(tr("Command"), QLatin1String("") + data.command.toHtmlEscaped() + QLatin1String("")) - << formatSummaryText(tr("Run Time"), Util::formatTimeString(data.applicationRunningTime)) - << formatSummaryText(tr("Processes"), QString::number(data.processCount)) + << formatSummaryText(tr("Run Time"), Util::formatTimeString(data.applicationRunningTime)); + if (data.offCpuTime > 0 || data.onCpuTime > 0) { + stream << formatSummaryText(tr("On CPU Time"), Util::formatTimeString(data.onCpuTime)) + << formatSummaryText(tr("Off CPU Time"), Util::formatTimeString(data.offCpuTime)); + } + stream << formatSummaryText(tr("Processes"), QString::number(data.processCount)) << formatSummaryText(tr("Threads"), QString::number(data.threadCount)) << formatSummaryText(tr("Total Samples"), tr("%1 (%4)") .arg(QString::number(data.sampleCount), Util::formatFrequency(data.sampleCount, data.applicationRunningTime)));