Skip to content

Commit

Permalink
Merge pull request #17843 from hrydgard/android-display-timing
Browse files Browse the repository at this point in the history
Android: Measure display timing using VK_GOOGLE_display_timing
  • Loading branch information
hrydgard authored Aug 3, 2023
2 parents 87569f8 + 0530dc5 commit 8ef781f
Show file tree
Hide file tree
Showing 15 changed files with 178 additions and 71 deletions.
5 changes: 5 additions & 0 deletions Common/GPU/MiscTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,14 @@ typedef std::function<void(InvalidationCallbackFlags)> InvalidationCallback;
// These are separate from FrameData because we store some history of these.
// Also, this might be joined with more non-GPU timing information later.
struct FrameTimeData {
uint64_t frameId;
double frameBegin;
double afterFenceWait;
double firstSubmit;
double queuePresent;

double actualPresent;
double desiredPresentTime;
double earliestPresentTime;
double presentMargin;
};
6 changes: 4 additions & 2 deletions Common/GPU/Vulkan/VulkanContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,9 +692,11 @@ VkResult VulkanContext::CreateDevice() {
extensionsLookup_.EXT_fragment_shader_interlock = EnableDeviceExtension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME);
extensionsLookup_.ARM_rasterization_order_attachment_access = EnableDeviceExtension(VK_ARM_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_EXTENSION_NAME);

extensionsLookup_.KHR_present_id = EnableDeviceExtension(VK_KHR_PRESENT_ID_EXTENSION_NAME);
extensionsLookup_.KHR_present_wait = EnableDeviceExtension(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
extensionsLookup_.GOOGLE_display_timing = EnableDeviceExtension(VK_GOOGLE_DISPLAY_TIMING_EXTENSION_NAME);
if (!extensionsLookup_.GOOGLE_display_timing) {
extensionsLookup_.KHR_present_id = EnableDeviceExtension(VK_KHR_PRESENT_ID_EXTENSION_NAME);
extensionsLookup_.KHR_present_wait = EnableDeviceExtension(VK_KHR_PRESENT_WAIT_EXTENSION_NAME);
}

VkPhysicalDeviceFeatures2 features2{ VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 };

Expand Down
9 changes: 8 additions & 1 deletion Common/GPU/Vulkan/VulkanFrameData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,18 @@ VkResult FrameData::QueuePresent(VulkanContext *vulkan, FrameDataShared &shared)
present.pWaitSemaphores = &shared.renderingCompleteSemaphore;
present.waitSemaphoreCount = 1;

// Can't move these into the if.
VkPresentIdKHR presentID{ VK_STRUCTURE_TYPE_PRESENT_ID_KHR };
VkPresentTimesInfoGOOGLE presentGOOGLE{ VK_STRUCTURE_TYPE_PRESENT_TIMES_INFO_GOOGLE };
VkPresentTimeGOOGLE presentTimeGOOGLE{ (uint32_t)frameId, 0 }; // it's ok to truncate this. it'll wrap around and work (if we ever reach 4 billion frames..)
if (vulkan->Extensions().KHR_present_id && vulkan->GetDeviceFeatures().enabled.presentId.presentId) {
presentID.pPresentIds = &frameID;
presentID.pPresentIds = &frameId;
presentID.swapchainCount = 1;
present.pNext = &presentID;
} else if (vulkan->Extensions().GOOGLE_display_timing) {
presentGOOGLE.pTimes = &presentTimeGOOGLE;
presentGOOGLE.swapchainCount = 1;
present.pNext = &presentGOOGLE;
}

return vkQueuePresentKHR(vulkan->GetGraphicsQueue(), &present);
Expand Down
1 change: 0 additions & 1 deletion Common/GPU/Vulkan/VulkanFrameData.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ struct FrameData {
std::mutex fenceMutex;
std::condition_variable fenceCondVar;
bool readyForFence = true;
uint64_t frameID; // always incrementing, set at the start of each frame.

VkFence fence = VK_NULL_HANDLE;

Expand Down
10 changes: 9 additions & 1 deletion Common/GPU/Vulkan/VulkanLoader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
PFN_vkWaitForPresentKHR vkWaitForPresentKHR;
PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE;
PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE;

} // namespace PPSSPP_VK

Expand Down Expand Up @@ -728,8 +730,14 @@ void VulkanLoadDeviceFunctions(VkDevice device, const VulkanExtensions &enabledE
LOAD_DEVICE_FUNC(device, vkCmdNextSubpass);
LOAD_DEVICE_FUNC(device, vkCmdEndRenderPass);
LOAD_DEVICE_FUNC(device, vkCmdExecuteCommands);
LOAD_DEVICE_FUNC(device, vkWaitForPresentKHR);

if (enabledExtensions.KHR_present_wait) {
LOAD_DEVICE_FUNC(device, vkWaitForPresentKHR);
}
if (enabledExtensions.GOOGLE_display_timing) {
LOAD_DEVICE_FUNC(device, vkGetPastPresentationTimingGOOGLE);
LOAD_DEVICE_FUNC(device, vkGetRefreshCycleDurationGOOGLE);
}
if (enabledExtensions.KHR_dedicated_allocation) {
LOAD_DEVICE_FUNC(device, vkGetBufferMemoryRequirements2KHR);
LOAD_DEVICE_FUNC(device, vkGetImageMemoryRequirements2KHR);
Expand Down
2 changes: 2 additions & 0 deletions Common/GPU/Vulkan/VulkanLoader.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ extern PFN_vkGetPhysicalDeviceProperties2KHR vkGetPhysicalDeviceProperties2KHR;
extern PFN_vkGetPhysicalDeviceFeatures2KHR vkGetPhysicalDeviceFeatures2KHR;
extern PFN_vkCreateRenderPass2KHR vkCreateRenderPass2KHR;
extern PFN_vkWaitForPresentKHR vkWaitForPresentKHR;
extern PFN_vkGetPastPresentationTimingGOOGLE vkGetPastPresentationTimingGOOGLE;
extern PFN_vkGetRefreshCycleDurationGOOGLE vkGetRefreshCycleDurationGOOGLE;
} // namespace PPSSPP_VK

// For fast extension-enabled checks.
Expand Down
42 changes: 35 additions & 7 deletions Common/GPU/Vulkan/VulkanRenderManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,16 +546,42 @@ void VulkanRenderManager::PresentWaitThreadFunc() {
uint64_t waitedId = frameIdGen_;
while (run_) {
const uint64_t timeout = 1000000000ULL; // 1 sec
vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout);
frameTimeData_[waitedId].actualPresent = time_now_d();
waitedId++;

if (VK_SUCCESS == vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout)) {
frameTimeData_[waitedId].actualPresent = time_now_d();
waitedId++;
}
_dbg_assert_(waitedId <= frameIdGen_);
}

INFO_LOG(G3D, "Leaving PresentWaitThreadFunc()");
}

void VulkanRenderManager::PollPresentTiming() {
// For VK_GOOGLE_display_timing, we need to poll.

// Poll for information about completed frames.
// NOTE: We seem to get the information pretty late! Like after 6 frames, which is quite weird.
// Tested on POCO F4.
if (vulkan_->Extensions().GOOGLE_display_timing) {
uint32_t count = 0;
vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, nullptr);
if (count > 0) {
VkPastPresentationTimingGOOGLE *timings = new VkPastPresentationTimingGOOGLE[count];
vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, timings);
for (uint32_t i = 0; i < count; i++) {
uint64_t presentId = timings[i].presentID;
frameTimeData_[presentId].actualPresent = from_time_raw(timings[i].actualPresentTime);
frameTimeData_[presentId].desiredPresentTime = from_time_raw(timings[i].desiredPresentTime);
frameTimeData_[presentId].earliestPresentTime = from_time_raw(timings[i].earliestPresentTime);
double presentMargin = from_time_raw_relative(timings[i].presentMargin);
frameTimeData_[presentId].presentMargin = presentMargin;
}
delete[] timings;
}
}
}


void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) {
double frameBeginTime = time_now_d()
VLOG("BeginFrame");
Expand All @@ -582,17 +608,19 @@ void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfile
}
vkResetFences(device, 1, &frameData.fence);

uint64_t frameId = frameIdGen_++;

PollPresentTiming();

int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;

// Can't set this until after the fence.
frameData.profile.enabled = enableProfiling;
frameData.profile.timestampsEnabled = enableProfiling && validBits > 0;
frameData.frameID = frameIdGen_++;

uint64_t frameId = ++frameIdGen_;
frameData.frameId = frameId;

frameTimeData_[frameId] = {};
frameTimeData_[frameId].frameId = frameId;
frameTimeData_[frameId].frameBegin = frameBeginTime;
frameTimeData_[frameId].afterFenceWait = time_now_d();

Expand Down
1 change: 1 addition & 0 deletions Common/GPU/Vulkan/VulkanRenderManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ class VulkanRenderManager {
void StopThread();

void PresentWaitThreadFunc();
void PollPresentTiming();

FrameDataShared frameDataShared_;

Expand Down
14 changes: 10 additions & 4 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ class VKContext : public DrawContext {
VkDescriptorSet GetOrCreateDescriptorSet(VkBuffer buffer);

std::vector<std::string> GetFeatureList() const override;
std::vector<std::string> GetExtensionList() const override;
std::vector<std::string> GetExtensionList(bool enabledOnly) const override;

uint64_t GetNativeObject(NativeObject obj, void *srcObject) override;

Expand Down Expand Up @@ -1626,10 +1626,16 @@ std::vector<std::string> VKContext::GetFeatureList() const {
return features;
}

std::vector<std::string> VKContext::GetExtensionList() const {
std::vector<std::string> VKContext::GetExtensionList(bool enabledOnly) const {
std::vector<std::string> extensions;
for (auto &iter : vulkan_->GetDeviceExtensionsAvailable()) {
extensions.push_back(iter.extensionName);
if (enabledOnly) {
for (auto &iter : vulkan_->GetDeviceExtensionsEnabled()) {
extensions.push_back(iter);
}
} else {
for (auto &iter : vulkan_->GetDeviceExtensionsAvailable()) {
extensions.push_back(iter.extensionName);
}
}
return extensions;
}
Expand Down
2 changes: 1 addition & 1 deletion Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ class DrawContext {
virtual const DeviceCaps &GetDeviceCaps() const = 0;
virtual uint32_t GetDataFormatSupport(DataFormat fmt) const = 0;
virtual std::vector<std::string> GetFeatureList() const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetExtensionList() const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetExtensionList(bool enabledOnly) const { return std::vector<std::string>(); }
virtual std::vector<std::string> GetDeviceList() const { return std::vector<std::string>(); }

virtual PresentationMode GetPresentationMode() const = 0;
Expand Down
45 changes: 38 additions & 7 deletions Common/TimeUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@

// TODO: https://github.com/floooh/sokol/blob/9a6237fcdf213e6da48e4f9201f144bcb2dcb46f/sokol_time.h#L229-L248

static const double micros = 1000000.0;
static const double nanos = 1000000000.0;

#ifdef _WIN32

static LARGE_INTEGER frequency;
Expand All @@ -41,7 +44,18 @@ double time_now_d() {

// Fake, but usable in a pinch. Don't, though.
uint64_t time_now_raw() {
return (uint64_t)(time_now_d() * 1000000000.0);
return (uint64_t)(time_now_d() * nanos);
}

double from_time_raw(uint64_t raw_time) {
if (raw_time == 0) {
return 0.0; // invalid time
}
return (double)raw_time * (1.0 / nanos);
}

double from_time_raw_relative(uint64_t raw_time) {
return from_time_raw(raw_time);
}

#elif PPSSPP_PLATFORM(ANDROID) || PPSSPP_PLATFORM(LINUX) || PPSSPP_PLATFORM(MAC) || PPSSPP_PLATFORM(IOS)
Expand All @@ -53,13 +67,22 @@ uint64_t time_now_raw() {
return tp.tv_sec * 1000000000ULL + tp.tv_nsec;
}

static uint64_t g_startTime;

double from_time_raw(uint64_t raw_time) {
return (double)(raw_time - g_startTime) * (1.0 / nanos);
}

double time_now_d() {
static uint64_t start;
uint64_t raw_time = time_now_raw();
if (start == 0) {
start = raw_time;
if (g_startTime == 0) {
g_startTime = raw_time;
}
return (double)(raw_time - start) * (1.0 / 1000000000.0);
return from_time_raw(raw_time);
}

double from_time_raw_relative(uint64_t raw_time) {
return (double)raw_time * (1.0 / nanos);
}

#else
Expand All @@ -71,12 +94,20 @@ double time_now_d() {
if (start == 0) {
start = tv.tv_sec;
}
return (double)(tv.tv_sec - start) + (double)tv.tv_usec * (1.0 / 1000000.0);
return (double)(tv.tv_sec - start) + (double)tv.tv_usec * (1.0 / micros);
}

// Fake, but usable in a pinch. Don't, though.
uint64_t time_now_raw() {
return (uint64_t)(time_now_d() * 1000000000.0);
return (uint64_t)(time_now_d() * nanos);
}

double from_time_raw(uint64_t raw_time) {
return (double)raw_time * (1.0 / nanos);
}

double from_time_raw_relative(uint64_t raw_time) {
return from_time_raw(raw_time);
}

#endif
Expand Down
4 changes: 4 additions & 0 deletions Common/TimeUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ double time_now_d();
// The only intended use is to match the timings from VK_GOOGLE_display_timing.
uint64_t time_now_raw();

// This is only interesting for Linux, in relation to VK_GOOGLE_display_timing.
double from_time_raw(uint64_t raw_time);
double from_time_raw_relative(uint64_t raw_time);

// Sleep. Does not necessarily have millisecond granularity, especially on Windows.
void sleep_ms(int ms);

Expand Down
69 changes: 38 additions & 31 deletions UI/DebugOverlay.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,40 +104,47 @@ static void DrawFrameTiming(UIContext *ctx, const Bounds &bounds) {

char statBuf[1024]{};

FrameTimeData data = ctx->GetDrawContext()->GetFrameTimeData(4);

if (data.frameBegin == 0.0) {
snprintf(statBuf, sizeof(statBuf), "(Frame timing collection not supported on this backend)");
} else {
double fenceLatency_s = data.afterFenceWait - data.frameBegin;
double submitLatency_s = data.firstSubmit - data.frameBegin;
double queuePresentLatency_s = data.queuePresent - data.frameBegin;
double actualPresentLatency_s = data.actualPresent - data.frameBegin;

char presentStats[256] = "";
if (data.actualPresent != 0.0) {
snprintf(presentStats, sizeof(presentStats),
"* Actual present: %0.1f ms\n",
actualPresentLatency_s * 1000.0);
}
snprintf(statBuf, sizeof(statBuf),
"Time from start of frame to event:\n"
"* Past the fence: %0.1f ms\n"
"* First submit: %0.1f ms\n"
"* Queue-present: %0.1f ms\n"
"%s",
fenceLatency_s * 1000.0,
submitLatency_s * 1000.0,
queuePresentLatency_s * 1000.0,
presentStats
);
}

ctx->Flush();
ctx->BindFontTexture();
ctx->Draw()->SetFontScale(0.5f, 0.5f);
ctx->Draw()->DrawTextRect(ubuntu24, statBuf, bounds.x + 11, bounds.y + 51, bounds.w - 20, bounds.h - 30, 0xc0000000, FLAG_DYNAMIC_ASCII);
ctx->Draw()->DrawTextRect(ubuntu24, statBuf, bounds.x + 10, bounds.y + 50, bounds.w - 20, bounds.h - 30, 0xFFFFFFFF, FLAG_DYNAMIC_ASCII);

for (int i = 0; i < 8; i++) {
FrameTimeData data = ctx->GetDrawContext()->GetFrameTimeData(6 + i);
if (data.frameBegin == 0.0) {
snprintf(statBuf, sizeof(statBuf), "(Frame timing collection not supported on this backend)");
} else {
double fenceLatency_s = data.afterFenceWait - data.frameBegin;
double submitLatency_s = data.firstSubmit - data.frameBegin;
double queuePresentLatency_s = data.queuePresent - data.frameBegin;
double actualPresentLatency_s = data.actualPresent - data.frameBegin;
double presentMargin = data.presentMargin;
double computedMargin = data.actualPresent - data.queuePresent;

char presentStats[256] = "";
if (data.actualPresent != 0.0) {
snprintf(presentStats, sizeof(presentStats),
"* Present: %0.1f ms\n"
"* Margin: %0.1f ms\n"
"* Margin(c): %0.1f ms\n",
actualPresentLatency_s * 1000.0,
presentMargin * 1000.0,
computedMargin * 1000.0);
}
snprintf(statBuf, sizeof(statBuf),
"%llu: From start of frame to event:\n"
"* Past fence: %0.1f ms\n"
"* Submit #1: %0.1f ms\n"
"* Queue-p: %0.1f ms\n"
"%s",
(long long)data.frameId,
fenceLatency_s * 1000.0,
submitLatency_s * 1000.0,
queuePresentLatency_s * 1000.0,
presentStats
);
}
ctx->Draw()->DrawTextRect(ubuntu24, statBuf, bounds.x + 10 + i * 150, bounds.y + 50, bounds.w - 20, bounds.h - 30, 0xFFFFFFFF, FLAG_DYNAMIC_ASCII);
}
ctx->Draw()->SetFontScale(1.0f, 1.0f);
ctx->Flush();
ctx->RebindTexture();
Expand Down
Loading

0 comments on commit 8ef781f

Please sign in to comment.