Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve quantization utilities #598

Merged
merged 7 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set(SOURCES
src/indexgenerator.cpp
src/overdrawanalyzer.cpp
src/overdrawoptimizer.cpp
src/quantization.cpp
src/simplifier.cpp
src/spatialorder.cpp
src/stripifier.cpp
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ unsigned short py = meshopt_quantizeHalf(v.y);
unsigned short pz = meshopt_quantizeHalf(v.z);
```

Since quantized vertex attributes often need to remain in their compact representations for efficient transfer and storage, they are usually dequantized during vertex processing by configuring the GPU vertex input correctly to expect normalized integers or half precision floats, which often needs no or minimal changes to the shader code. When CPU dequantization is required instead, `meshopt_dequantizeHalf` can be used to convert half precision values back to single precision; for normalized integer formats, the dequantization just requires dividing by 2^N-1 for unorm and 2^(N-1)-1 for snorm variants, for example manually reversing `meshopt_quantizeUnorm(v, 10)` can be done by dividing by 1023.

## Vertex/index buffer compression

In case storage size or transmission bandwidth is of importance, you might want to additionally compress vertex and index data. While several mesh compression libraries, like Google Draco, are available, they typically are designed to maximize the compression ratio at the cost of disturbing the vertex/index order (which makes the meshes inefficient to render on GPU) or decompression performance. They also frequently don't support custom game-ready quantized vertex formats and thus require to re-quantize the data after loading it, introducing extra quantization errors and making decoding slower.
Expand Down
101 changes: 101 additions & 0 deletions demo/tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,103 @@ static void tessellation()
assert(memcmp(tessib, expected, sizeof(expected)) == 0);
}

static void quantizeFloat()
{
volatile float zero = 0.f; // avoids div-by-zero warnings

assert(meshopt_quantizeFloat(1.2345f, 23) == 1.2345f);

assert(meshopt_quantizeFloat(1.2345f, 16) == 1.2344971f);
assert(meshopt_quantizeFloat(1.2345f, 8) == 1.2343750f);
assert(meshopt_quantizeFloat(1.2345f, 4) == 1.25f);
assert(meshopt_quantizeFloat(1.2345f, 1) == 1.0);

assert(meshopt_quantizeFloat(1.f, 0) == 1.0f);

assert(meshopt_quantizeFloat(1.f / zero, 0) == 1.f / zero);
assert(meshopt_quantizeFloat(-1.f / zero, 0) == -1.f / zero);

float nanf = meshopt_quantizeFloat(zero / zero, 8);
assert(nanf != nanf);
}

static void quantizeHalf()
{
volatile float zero = 0.f; // avoids div-by-zero warnings

// normal
assert(meshopt_quantizeHalf(1.2345f) == 0x3cf0);

// overflow
assert(meshopt_quantizeHalf(65535.f) == 0x7c00);
assert(meshopt_quantizeHalf(-65535.f) == 0xfc00);

// large
assert(meshopt_quantizeHalf(65000.f) == 0x7bef);
assert(meshopt_quantizeHalf(-65000.f) == 0xfbef);

// small
assert(meshopt_quantizeHalf(0.125f) == 0x3000);
assert(meshopt_quantizeHalf(-0.125f) == 0xb000);

// very small
assert(meshopt_quantizeHalf(1e-4f) == 0x068e);
assert(meshopt_quantizeHalf(-1e-4f) == 0x868e);

// underflow
assert(meshopt_quantizeHalf(1e-5f) == 0x0000);
assert(meshopt_quantizeHalf(-1e-5f) == 0x8000);

// exponent underflow
assert(meshopt_quantizeHalf(1e-20f) == 0x0000);
assert(meshopt_quantizeHalf(-1e-20f) == 0x8000);

// exponent overflow
assert(meshopt_quantizeHalf(1e20f) == 0x7c00);
assert(meshopt_quantizeHalf(-1e20f) == 0xfc00);

// inf
assert(meshopt_quantizeHalf(1.f / zero) == 0x7c00);
assert(meshopt_quantizeHalf(-1.f / zero) == 0xfc00);

// nan
unsigned short nanh = meshopt_quantizeHalf(zero / zero);
assert(nanh == 0x7e00 || nanh == 0xfe00);
}

static void dequantizeHalf()
{
volatile float zero = 0.f; // avoids div-by-zero warnings

// normal
assert(meshopt_dequantizeHalf(0x3cf0) == 1.234375f);

// large
assert(meshopt_dequantizeHalf(0x7bef) == 64992.f);
assert(meshopt_dequantizeHalf(0xfbef) == -64992.f);

// small
assert(meshopt_dequantizeHalf(0x3000) == 0.125f);
assert(meshopt_dequantizeHalf(0xb000) == -0.125f);

// very small
assert(meshopt_dequantizeHalf(0x068e) == 1.00016594e-4f);
assert(meshopt_dequantizeHalf(0x868e) == -1.00016594e-4f);

// denormal
assert(meshopt_dequantizeHalf(0x00ff) == 0.f);
assert(meshopt_dequantizeHalf(0x80ff) == 0.f); // actually this is -0.f
assert(1.f / meshopt_dequantizeHalf(0x80ff) == -1.f / zero);

// inf
assert(meshopt_dequantizeHalf(0x7c00) == 1.f / zero);
assert(meshopt_dequantizeHalf(0xfc00) == -1.f / zero);

// nan
float nanf = meshopt_dequantizeHalf(0x7e00);
assert(nanf != nanf);
}

void runTests()
{
decodeIndexV0();
Expand Down Expand Up @@ -1284,4 +1381,8 @@ void runTests()

adjacency();
tessellation();

quantizeFloat();
quantizeHalf();
dequantizeHalf();
}
58 changes: 10 additions & 48 deletions src/meshoptimizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -577,19 +577,25 @@ inline int meshopt_quantizeUnorm(float v, int N);
inline int meshopt_quantizeSnorm(float v, int N);

/**
* Quantize a float into half-precision floating point value
* Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
* Representable magnitude range: [6e-5; 65504]
* Maximum relative reconstruction error: 5e-4
*/
inline unsigned short meshopt_quantizeHalf(float v);
MESHOPTIMIZER_API unsigned short meshopt_quantizeHalf(float v);

/**
* Quantize a float into a floating point value with a limited number of significant mantissa bits
* Quantize a float into a floating point value with a limited number of significant mantissa bits, preserving the IEEE-754 fp32 binary representation
* Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest
* Assumes N is in a valid mantissa precision range, which is 1..23
*/
inline float meshopt_quantizeFloat(float v, int N);
MESHOPTIMIZER_API float meshopt_quantizeFloat(float v, int N);

/**
* Reverse quantization of a half-precision (as defined by IEEE-754 fp16) floating point value
* Preserves Inf/NaN, flushes denormals to zero
*/
MESHOPTIMIZER_API float meshopt_dequantizeHalf(unsigned short h);
#endif

/**
Expand Down Expand Up @@ -684,50 +690,6 @@ inline int meshopt_quantizeSnorm(float v, int N)

return int(v * scale + round);
}

inline unsigned short meshopt_quantizeHalf(float v)
{
union { float f; unsigned int ui; } u = {v};
unsigned int ui = u.ui;

int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;

/* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */
int h = (em - (112 << 23) + (1 << 12)) >> 13;

/* underflow: flush to zero; 113 encodes exponent -14 */
h = (em < (113 << 23)) ? 0 : h;

/* overflow: infinity; 143 encodes exponent 16 */
h = (em >= (143 << 23)) ? 0x7c00 : h;

/* NaN; note that we convert all types of NaN to qNaN */
h = (em > (255 << 23)) ? 0x7e00 : h;

return (unsigned short)(s | h);
}

inline float meshopt_quantizeFloat(float v, int N)
{
union { float f; unsigned int ui; } u = {v};
unsigned int ui = u.ui;

const int mask = (1 << (23 - N)) - 1;
const int round = (1 << (23 - N)) >> 1;

int e = ui & 0x7f800000;
unsigned int rui = (ui + round) & ~mask;

/* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */
ui = e == 0x7f800000 ? ui : rui;

/* flush denormals to zero */
ui = e == 0 ? 0 : ui;

u.ui = ui;
return u.f;
}
#endif

/* Internal implementation helpers */
Expand Down
70 changes: 70 additions & 0 deletions src/quantization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
#include "meshoptimizer.h"

#include <assert.h>

unsigned short meshopt_quantizeHalf(float v)
{
union { float f; unsigned int ui; } u = {v};
unsigned int ui = u.ui;

int s = (ui >> 16) & 0x8000;
int em = ui & 0x7fffffff;

// bias exponent and round to nearest; 112 is relative exponent bias (127-15)
int h = (em - (112 << 23) + (1 << 12)) >> 13;

// underflow: flush to zero; 113 encodes exponent -14
h = (em < (113 << 23)) ? 0 : h;

// overflow: infinity; 143 encodes exponent 16
h = (em >= (143 << 23)) ? 0x7c00 : h;

// NaN; note that we convert all types of NaN to qNaN
h = (em > (255 << 23)) ? 0x7e00 : h;

return (unsigned short)(s | h);
}

float meshopt_quantizeFloat(float v, int N)
{
assert(N >= 0 && N <= 23);

union { float f; unsigned int ui; } u = {v};
unsigned int ui = u.ui;

const int mask = (1 << (23 - N)) - 1;
const int round = (1 << (23 - N)) >> 1;

int e = ui & 0x7f800000;
unsigned int rui = (ui + round) & ~mask;

// round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0
ui = e == 0x7f800000 ? ui : rui;

// flush denormals to zero
ui = e == 0 ? 0 : ui;

u.ui = ui;
return u.f;
}

float meshopt_dequantizeHalf(unsigned short h)
{
unsigned int s = unsigned(h & 0x8000) << 16;
int em = h & 0x7fff;

// bias exponent and pad mantissa with 0; 112 is relative exponent bias (127-15)
int r = (em + (112 << 10)) << 13;

// denormal: flush to zero
r = (em < (1 << 10)) ? 0 : r;

// infinity/NaN; note that we preserve NaN payload as a byproduct of unifying inf/nan cases
// 112 is an exponent bias fixup; since we already applied it once, applying it twice converts 31 to 255
r += (em >= (31 << 10)) ? (112 << 23) : 0;

union { float f; unsigned int ui; } u;
u.ui = s | r;
return u.f;
}