Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adsk Contrib - Modify half-domain LUT1D GPU shader to improve zero handling #1981

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/OpenColorIO/ops/lut1d/Lut1DOpGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator,
{
static const float NEG_MIN_EXP = 15.0f;
static const float EXP_SCALE = 1024.0f;
static const float HALF_DENRM_MAX = 6.09755515e-05f; // e.g. 2^-14 - 2^-24
static const float INV_DENRM_STEP = 16777216.0f; // 1 / 2^-24

ss.newLine() << "float dep;";
ss.newLine() << "float abs_f = abs(f);";
Expand All @@ -258,15 +258,15 @@ void GetLut1DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator,
ss.newLine() << "else";
ss.newLine() << "{";
ss.indent();
// Extract bits from denormalized values
ss.newLine() << "dep = abs_f * 1023.0 / " << HALF_DENRM_MAX << ";";
// Extract bits from denormalized values.
ss.newLine() << "dep = abs_f * " << INV_DENRM_STEP << ";";
ss.dedent();
ss.newLine() << "}";

// Adjust position for negative values
ss.newLine() << "dep += step(f, 0.0) * 32768.0;";
// Adjust position for negative values.
ss.newLine() << "dep += (f < 0.) ? 32768.0 : 0.0;";

// At this point 'dep' contains the raw half
// At this point 'dep' contains the raw half.
// Note: Raw halfs for NaN floats cannot be computed using
// floating-point operations.
}
Expand Down
32 changes: 31 additions & 1 deletion tests/cpu/Processor_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
namespace OCIO = OCIO_NAMESPACE;


OCIO_ADD_TEST(Processor, basic)
OCIO_ADD_TEST(Processor, basic_cache)
{
OCIO::ConfigRcPtr config = OCIO::Config::Create();
OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();
Expand Down Expand Up @@ -56,6 +56,36 @@ OCIO_ADD_TEST(Processor, basic)
OCIO_CHECK_EQUAL(std::string(processorMat->getCacheID()), "1b1880136f7669351adb0dcae0f4f9fd");
}

OCIO_ADD_TEST(Processor, basic_cache_lut)
{
OCIO::ConfigRcPtr config = OCIO::Config::Create();
OCIO::GroupTransformRcPtr group = OCIO::GroupTransform::Create();

auto processorEmptyGroup = config->getProcessor(group);
OCIO_CHECK_EQUAL(processorEmptyGroup->getNumTransforms(), 0);
OCIO_CHECK_EQUAL(std::string(processorEmptyGroup->getCacheID()), "<NOOP>");

auto lut = OCIO::Lut3DTransform::Create(3);
// Make sure it's not an identity.
lut->setValue(2, 2, 2, 2.f, 3.f, 4.f);

auto processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(processorLut->getNumTransforms(), 1);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec");

// Check behaviour of the cacheID

// Change a value and check that the cacheID changes.
lut->setValue(2, 2, 2, 1.f, 3.f, 4.f);
processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "288ec8ea132adaca5b5aed24a296a1a2");

// Restore the original value, check that the cache ID matches what it used to be.
lut->setValue(2, 2, 2, 2.f, 3.f, 4.f);
processorLut = config->getProcessor(lut);
OCIO_CHECK_EQUAL(std::string(processorLut->getCacheID()), "2b26d0097cdcf8f141fe3b3d6e21b5ec");
}

OCIO_ADD_TEST(Processor, unique_dynamic_properties)
{
OCIO::TransformDirection direction = OCIO::TRANSFORM_DIR_FORWARD;
Expand Down
47 changes: 47 additions & 0 deletions tests/gpu/Lut1DOp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,53 @@ OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_unequal_channels)
test.setTestInfinity(false);
}

OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_half_domain_negative_zero)
{
// This is an edge case, but this test documents that the behavior of CPU & GPU
// are different with respect to where in the LUT negative zero looks up at.
// This is only visible with half-domain LUTs that set different values for
// positive and negative zero, which really should be considered a bug in the LUT.
// Given that IEEE arithmetic specifies that -0 == +0 in comparisons, this does
// not seem to be worth fixing in OCIO at the cost of reduced performance.

// Create a half-domain LUT1D.
const auto lut = OCIO::Lut1DTransform::Create(65536, true);

// Set the positive and negative denorms to large values to make it easy
// to check that the processing is correct.
for (unsigned i=0; i<1024; i++)
{
const float x = static_cast<float>(i);
// Positive denorms.
lut->setValue(0 + i, x, x, x);
// Negative denorms. Create a jump between +0 and -0.
lut->setValue(32768 + i, x + 10.f, x + 10.f, x + 10.f);
}

test.setProcessor(lut);

// TODO: Would like this to be lower.
test.setErrorThreshold(2e-3f);

OCIOGPUTest::CustomValues values;
values.m_inputValues =
{
// Negative zero uses the positive 0 LUT value on the GPU, and negative 0 LUT on CPU.
// -0.00f, -0.00f, -0.000f, 0.0f,
0.00f, 0.00f, 0.000f, 1.0f,
// Use values that fall in the middle of the first, second, and third LUT segments
// to test accuracy in the denormals.
3e-8f, 9e-8f, 15e-8f, 0.0f,
-3e-8f, -9e-8f, -15e-8f, 0.0f,
// Throw in a more typical value.
0.50f, 0.05f, 0.005f, 0.5f,
};
test.setCustomValues(values);

test.setTestNaN(false);
test.setTestInfinity(false);
}

OCIO_ADD_GPU_TEST(Lut1DOp, lut1d_file2_test)
{
OCIO::FileTransformRcPtr file = GetFileTransform("lut1d_green.ctf");
Expand Down
Loading