From 5648517d4fd4fa7b409c5b1ad10cdae413953c90 Mon Sep 17 00:00:00 2001 From: Doug Walker Date: Thu, 30 May 2024 00:42:26 -0400 Subject: [PATCH] Modify half-domain LUT1D GPU shader to improve zero handling Signed-off-by: Doug Walker --- src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp | 12 +++--- tests/cpu/Processor_tests.cpp | 32 +++++++++++++++- tests/gpu/Lut1DOp_test.cpp | 47 ++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp b/src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp index 59091817da..26a5c7d712 100644 --- a/src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp +++ b/src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp @@ -234,7 +234,7 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, { static const float NEG_MIN_EXP = 15.0f; static const float EXP_SCALE = 1024.0f; - static const float HALF_DENRM_MAX = 6.09755515e-05f; // e.g. 2^-14 - 2^-24 + static const float INV_DENRM_STEP = 16777216.0f; // 1 / 2^-24 ss.newLine() << "float dep;"; ss.newLine() << "float abs_f = abs(f);"; @@ -258,15 +258,15 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ss.newLine() << "else"; ss.newLine() << "{"; ss.indent(); - // Extract bits from denormalized values - ss.newLine() << "dep = abs_f * 1023.0 / " << HALF_DENRM_MAX << ";"; + // Extract bits from denormalized values. + ss.newLine() << "dep = abs_f * " << INV_DENRM_STEP << ";"; ss.dedent(); ss.newLine() << "}"; - // Adjust position for negative values - ss.newLine() << "dep += step(f, 0.0) * 32768.0;"; + // Adjust position for negative values. + ss.newLine() << "dep += (f < 0.) ? 32768.0 : 0.0;"; - // At this point 'dep' contains the raw half + // At this point 'dep' contains the raw half. // Note: Raw halfs for NaN floats cannot be computed using // floating-point operations. } diff --git a/tests/cpu/Processor_tests.cpp b/tests/cpu/Processor_tests.cpp index 7501e00d42..d2e0087251 100644 --- a/tests/cpu/Processor_tests.cpp +++ b/tests/cpu/Processor_tests.cpp @@ -12,7 +12,7 @@ namespace OCIO = OCIO_NAMESPACE; -OCIO_ADD_TEST(Processor, basic) +OCIO_ADD_TEST(Processor, basic_cache) { OCIO::ConfigRcPtr config = OCIO::Config::Create(); OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create(); @@ -56,6 +56,36 @@ OCIO_ADD_TEST(Processor, basic) OCIO_CHECK_EQUAL(std::string(processorMat->getCacheID()), "1b1880136f7669351adb0dcae0f4f9fd"); } +OCIO_ADD_TEST(Processor, basic_cache_lut) +{ + OCIO::ConfigRcPtr config = OCIO::Config::Create(); + OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create(); + + auto processorEmptyGroup = config->getProcessor(group); + OCIO_CHECK_EQUAL(processorEmptyGroup->getNumTransforms(), 0); + OCIO_CHECK_EQUAL(std::string(processorEmptyGroup->getCacheID()), ""); + + auto lut = OCIO::Lut3DTransform::Create(3); + // Make sure it's not an identity. + lut->setValue(2, 2, 2, 2.f, 3.f, 4.f); + + auto processorLut = config->getProcessor(lut); + OCIO_CHECK_EQUAL(processorLut->getNumTransforms(), 1); + OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec"); + + // Check behaviour of the cacheID + + // Change a value and check that the cacheID changes. + lut->setValue(2, 2, 2, 1.f, 3.f, 4.f); + processorLut = config->getProcessor(lut); + OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "288ec8ea132adaca5b5aed24a296a1a2"); + + // Restore the original value, check that the cache ID matches what it used to be. + lut->setValue(2, 2, 2, 2.f, 3.f, 4.f); + processorLut = config->getProcessor(lut); + OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec"); +} + OCIO_ADD_TEST(Processor, unique_dynamic_properties) { OCIO::TransformDirection direction = OCIO::TRANSFORM_DIR_FORWARD; diff --git a/tests/gpu/Lut1DOp_test.cpp b/tests/gpu/Lut1DOp_test.cpp index cff3e7402a..91d13446bb 100644 --- a/tests/gpu/Lut1DOp_test.cpp +++ b/tests/gpu/Lut1DOp_test.cpp @@ -283,6 +283,53 @@ OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_unequal_channels) test.setTestInfinity(false); } +OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_negative_zero) +{ + // This is an edge case, but this test documents that the behavior of CPU & GPU + // are different with respect to where in the LUT negative zero looks up at. + // This is only visible with half-domain LUTs that set different values for + // positive and negative zero, which really should be considered a bug in the LUT. + // Given that IEEE arithmetic specifies that -0 == +0 in comparisons, this does + // not seem to be worth fixing in OCIO at the cost of reduced performance. + + // Create a half-domain LUT1D. + const auto lut = OCIO::Lut1DTransform::Create(65536, true); + + // Set the positive and negative denorms to large values to make it easy + // to check that the processing is correct. + for (unsigned i=0; i<1024; i++) + { + const float x = static_cast(i); + // Positive denorms. + lut->setValue(0 + i, x, x, x); + // Negative denorms. Create a jump between +0 and -0. + lut->setValue(32768 + i, x + 10.f, x + 10.f, x + 10.f); + } + + test.setProcessor(lut); + + // TODO: Would like this to be lower. + test.setErrorThreshold(2e-3f); + + OCIOGPUTest::CustomValues values; + values.m_inputValues = + { + // Negative zero uses the positive 0 LUT value on the GPU, and negative 0 LUT on CPU. + // -0.00f, -0.00f, -0.000f, 0.0f, + 0.00f, 0.00f, 0.000f, 1.0f, + // Use values that fall in the middle of the first, second, and third LUT segments + // to test accuracy in the denormals. + 3e-8f, 9e-8f, 15e-8f, 0.0f, + -3e-8f, -9e-8f, -15e-8f, 0.0f, + // Throw in a more typical value. + 0.50f, 0.05f, 0.005f, 0.5f, + }; + test.setCustomValues(values); + + test.setTestNaN(false); + test.setTestInfinity(false); +} + OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_file2_test) { OCIO::FileTransformRcPtr file = GetFileTransform("lut1d_green.ctf");