diff --git a/dlls/d3dx9_36/d3dx9_36.spec b/dlls/d3dx9_36/d3dx9_36.spec index f91f962..28259bc 100644 --- a/dlls/d3dx9_36/d3dx9_36.spec +++ b/dlls/d3dx9_36/d3dx9_36.spec @@ -130,8 +130,8 @@ @ stub D3DXFillVolumeTextureTX @ stdcall D3DXFilterTexture(ptr ptr long long) @ stdcall D3DXFindShaderComment(ptr long ptr ptr) -@ stub D3DXFloat16To32Array -@ stub D3DXFloat32To16Array +@ stdcall D3DXFloat16To32Array(ptr ptr long) +@ stdcall D3DXFloat32To16Array(ptr ptr long) @ stub D3DXFrameAppendChild @ stub D3DXFrameCalculateBoundingSphere @ stub D3DXFrameDestroy diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index fdb5f92..6f8a71d 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -1769,3 +1769,123 @@ D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, CON } return out; } + +static inline unsigned short float_32_to_16(const float in) +{ + int exp = 0, origexp; + float tmp = fabs(in); + int sign = signbit(in); + unsigned int mantissa; + unsigned short ret; + + /* Deal with special numbers */ + if (isinf(in)) return (sign ? 0xffff : 0x7fff); + if (isnan(in)) return (sign ? 0xffff : 0x7fff); + if (in == 0.0f) return (sign ? 0x8000 : 0x0000); + + if (tmp < powf(2, 10)) + { + do + { + tmp = tmp * 2.0f; + exp--; + } while (tmp < powf(2, 10)); + } + else if (tmp >= powf(2, 11)) + { + do + { + tmp /= 2.0f; + exp++; + } while (tmp >= powf(2, 11)); + } + + exp += 10; /* Normalize the mantissa */ + exp += 15; /* Exponent is encoded with excess 15 */ + + origexp = exp; + + mantissa = (unsigned int) tmp; + if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */ + (tmp - mantissa > 0.5f)) mantissa++; /* round to nearest, away from zero */ + if (mantissa == 2048) + { + mantissa = 1024; + exp++; + } + + if (exp > 31) + { + /* too big */ + ret = 0x7fff; /* INF */ + } + else if (exp <= 0) + { + unsigned int rounding = 0; + + exp = origexp; + + /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers */ + mantissa = (unsigned int) tmp; + mantissa &= 0x3ff; + mantissa |= 0x400; /* explicit the first bit */ + while (exp <= 0) + { + rounding = mantissa & 1; + mantissa >>= 1; + exp++; + } + ret = mantissa + rounding; + } + else + { + ret = (exp << 10) | (mantissa & 0x3ff); + } + + ret |= ((sign ? 1 : 0) << 15); /* Add the sign */ + return ret; +} + +D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, CONST FLOAT *pin, UINT n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + { + pout[i].value = float_32_to_16(pin[i]); + } + + return pout; +} + +/* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a + * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */ +static inline float float_16_to_32(const unsigned short in) +{ + const unsigned short s = (in & 0x8000); + const unsigned short e = (in & 0x7C00) >> 10; + const unsigned short m = in & 0x3FF; + const float sgn = (s ? -1.0f : 1.0f); + + if (e == 0) + { + if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */ + else return sgn * powf(2, -14.0f) * ((float)m / 1024.0f); + } + else + { + return sgn * powf(2, (float)e - 15.0f) * (1.0f + ((float)m / 1024.0f)); + } +} + +FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n) +{ + unsigned int i; + + for (i = 0; i < n; ++i) + { + pout[i] = float_16_to_32(pin[i].value); + } + + return pout; +} diff --git a/dlls/d3dx9_36/tests/math.c b/dlls/d3dx9_36/tests/math.c index 5ad1924..8a79893 100644 --- a/dlls/d3dx9_36/tests/math.c +++ b/dlls/d3dx9_36/tests/math.c @@ -21,6 +21,7 @@ #include "wine/test.h" #include "d3dx9.h" +#include #define ARRAY_SIZE 5 @@ -2215,6 +2216,337 @@ static void test_D3DXVec_Array(void) compare_planes(exp_plane, out_plane); } +#define INT16_TYPE short +#define UINT16_TYPE unsigned short +#define INT32_TYPE long +#define UINT32_TYPE unsigned long + +int singles2halfp(void *target, void *source, int numel) +{ + UINT16_TYPE *hp = (UINT16_TYPE *) target; // Type pun output as an unsigned 16-bit int + UINT32_TYPE *xp = (UINT32_TYPE *) source; // Type pun input as an unsigned 32-bit int + UINT16_TYPE hs, he, hm; + UINT32_TYPE x, xs, xe, xm; + int hes; + static int next; // Little Endian adjustment + static int checkieee = 0; // Flag to check for IEEE754, Endian, and word size + double one = 1.0; // Used for checking IEEE754 floating point format + UINT32_TYPE *ip; // Used for checking IEEE754 floating point format + + if( checkieee ) { // 1st call, so check for IEEE754, Endian, and word size + ip = (UINT32_TYPE *) &one; + if( *ip ) { // If Big Endian, then no adjustment + next = 0; + } else { // If Little Endian, then adjustment will be necessary + next = 1; + ip++; + } + if( *ip != 0x3FF00000u ) { // Check for exact IEEE 754 bit pattern of 1.0 + return 1; // Floating point bit pattern is not IEEE 754 + } + if( sizeof(INT16_TYPE) != 2 || sizeof(INT32_TYPE) != 4 ) { + return 1; // short is not 16-bits, or long is not 32-bits. + } + checkieee = 0; // Everything checks out OK + } + + if( source == NULL || target == NULL ) { // Nothing to convert (e.g., imag part of pure real) + return 0; + } + + while( numel-- ) { + x = *xp++; + if( (x & 0x7FFFFFFFu) == 0 ) { // Signed zero + *hp++ = (UINT16_TYPE) (x >> 16); // Return the signed zero + } else { // Not zero + xs = x & 0x80000000u; // Pick off sign bit + xe = x & 0x7F800000u; // Pick off exponent bits + xm = x & 0x007FFFFFu; // Pick off mantissa bits + if( xe == 0 ) { // Denormal will underflow, return a signed zero + *hp++ = (UINT16_TYPE) (xs >> 16); + } else if( xe == 0x7F800000u ) { // Inf or NaN (all the exponent bits are set) + if( xm == 0 ) { // If mantissa is zero ... + *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7fffu); // Signed Inf + } else { + *hp++ = (UINT16_TYPE) 0xFfffu; // NaN, only 1st mantissa bit set + } + } else { // Normalized number + hs = (UINT16_TYPE) (xs >> 16); // Sign bit + hes = ((int)(xe >> 23)) - 127 + 15; // Exponent unbias the single, then bias the halfp + if( hes >= 0x1F ) { // Overflow + *hp++ = (UINT16_TYPE) ((xs >> 16) | 0x7fffu); // Signed Inf + } else if( hes <= 0 ) { // Underflow + if( (14 - hes) > 24 ) { // Mantissa shifted all the way off & no rounding possibility + hm = (UINT16_TYPE) 0u; // Set mantissa to zero + } else { + xm |= 0x00800000u; // Add the hidden leading bit + hm = (UINT16_TYPE) (xm >> (14 - hes)); // Mantissa + if( (xm >> (13 - hes)) & 0x00000001u ) // Check for rounding + hm += (UINT16_TYPE) 1u; // Round, might overflow into exp bit, but this is OK + } + *hp++ = (hs | hm); // Combine sign bit and mantissa bits, biased exponent is zero + } else { + he = (UINT16_TYPE) (hes << 10); // Exponent + hm = (UINT16_TYPE) (xm >> 13); // Mantissa + if( xm & 0x00001000u ) // Check for rounding + *hp++ = (hs | he | hm) + (UINT16_TYPE) 1u; // Round, might overflow to inf, this is OK + else + *hp++ = (hs | he | hm); // No rounding + } + } + } + } + return 0; +} + +const char *int_to_binary +( + int x + ) +{ + static char b[100]; + int i = 0; + b[0] = '\0'; + + int z; + for (z = 32768; z > 0; z >>= 1, i++) + { + strcat(b, ((x & z) == z) ? "1" : "0"); + } + + return b; +} + +static inline unsigned short float_32_to_16(const float in) +{ + int exp = 0, origexp; + float tmp = fabs(in); + int sign = signbit(in); + unsigned int mantissa; + unsigned short ret; + + /* Deal with special numbers */ + if (isinf(in)) return (sign ? 0xffff : 0x7fff); + if (isnan(in)) return (sign ? 0xffff : 0x7fff); + if (in == 0.0f) return (sign ? 0x8000 : 0x0000); + + if (tmp < powf(2, 10)) + { + do + { + tmp = tmp * 2.0f; + exp--; + } while (tmp < powf(2, 10)); + } + else if (tmp >= powf(2, 11)) + { + do + { + tmp /= 2.0f; + exp++; + } while (tmp >= powf(2, 11)); + } + + exp += 10; /* Normalize the mantissa */ + exp += 15; /* Exponent is encoded with excess 15 */ + + origexp = exp; + + mantissa = (unsigned int) tmp; + if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */ + (tmp - mantissa > 0.5f)) mantissa++; /* round to nearest, away from zero */ + if (mantissa == 2048) + { + mantissa = 1024; + exp++; + } + + if (exp > 31) + { + /* too big */ + ret = 0x7fff; /* INF */ + } + else if (exp <= 0) + { + unsigned int rounding = 0; + + exp = origexp; + + /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers */ + printf("\ttmp = %f\n", tmp); + mantissa = (unsigned int) tmp; + printf("\tmantissa = %u %s\n", mantissa, int_to_binary(mantissa)); + mantissa &= 0x3ff; + printf("\tmantissa & 0x3ff = %u %s\n", mantissa, int_to_binary(mantissa)); + mantissa |= 0x400; /* explicit the first bit */ + printf("\tmantissa | 0x400 = %u %s\n", mantissa, int_to_binary(mantissa)); + while (exp <= 0) + { + rounding = mantissa & 1; + mantissa >>= 1; + exp++; + } + printf("\trounding = %d\n", rounding); + ret = mantissa + rounding; + } + else + { + ret = (exp << 10) | (mantissa & 0x3ff); + } + + ret |= ((sign ? 1 : 0) << 15); /* Add the sign */ + return ret; +} + +const char *single_to_binary +( + unsigned int x + ) +{ + static char b[100]; + int i = 0; + b[0] = '\0'; + + unsigned int z; + for (z = 32768*65536; z > 0; z >>= 1, i++) + { + strcat(b, ((x & z) == z) ? "1" : "0"); + if (i == 0 || i == 8) strcat(b, " "); + } + + return b; +} + +const char *half_to_binary +( + int x + ) +{ + static char b[100]; + int i = 0; + b[0] = '\0'; + + int z; + for (z = 32768; z > 0; z >>= 1, i++) + { + strcat(b, ((x & z) == z) ? "1" : "0"); + if (i == 0 || i == 5) strcat(b, " "); + } + + return b; +} + +static void test_D3DXFloat_Array(void) +{ + unsigned int i; + void *out = NULL; + D3DXFLOAT16 half; + FLOAT single; + struct + { + FLOAT single_in; + + /* half_ver2 occurs on WXPPROSP3 (32 bit math), WVISTAADM (32 bit math), W7PRO (32 bit math) */ + WORD half_ver1, half_ver2; + + /* single_out_ver2 confirms that half -> single conversion is consistent across platforms */ + FLOAT single_out_ver1, single_out_ver2; + } testdata[] = { + { 80000.0f, 0x7c00, 0x7ce2, 65536.0f, 80000.0f }, + { 65503.0f, 0x7bff, 0x7bff, 65504.0f, 65504.0f }, + { 65504.0f, 0x7bff, 0x7bff, 65504.0f, 65504.0f }, + { 65520.0f, 0x7bff, 0x7c00, 65504.0f, 65536.0f }, + { 65521.0f, 0x7c00, 0x7c00, 65536.0f, 65536.0f }, + { 65534.0f, 0x7c00, 0x7c00, 65536.0f, 65536.0f }, + { 65535.0f, 0x7c00, 0x7c00, 65535.0f, 65536.0f }, + { 65536.0f, 0x7c00, 0x7c00, 65536.0f, 65536.0f }, + { -80000.0f, 0xfc00, 0xfce2, -65536.0f, -80000.0f }, + { -65503.0f, 0xfbff, 0xfbff, -65504.0f, -65504.0f }, + { -65504.0f, 0xfbff, 0xfbff, -65504.0f, -65504.0f }, + { -65520.0f, 0xfbff, 0xfc00, -65504.0f, -65536.0f }, + { -65521.0f, 0xfc00, 0xfc00, -65536.0f, -65536.0f }, + { -65534.0f, 0xfc00, 0xfc00, -65536.0f, -65536.0f }, + { -65535.0f, 0xfc00, 0xfc00, -65535.0f, -65536.0f }, + { -65536.0f, 0xfc00, 0xfc00, -65536.0f, -65536.0f }, + { INFINITY, 0x7c00, 0x7fff, 65536.0f, 131008.0f }, + { -INFINITY, 0xffff, 0xffff, -131008.0f, -131008.0f }, + { NAN, 0x7fff, 0x7fff, 131008.0f, 131008.0f }, + { -NAN, 0xffff, 0xffff, -131008.0f, -131008.0f }, + { 0.0f, 0x0, 0x0, 0.0f, 0.0f }, + { -0.0f, 0x8000, 0x8000, 0.0f, 0.0f } + }; + + /* exception on NULL out or in parameter */ + out = D3DXFloat32To16Array(&half, &single, 0); + ok(out == &half, "Got %p, expected %p.\n", out, &half); + + out = D3DXFloat16To32Array(&single, (D3DXFLOAT16 *)&half, 0); + ok(out == &single, "Got %p, expected %p.\n", out, &single); + + for (i = 0; i < sizeof(testdata)/sizeof(testdata[0]); i++) + { + out = D3DXFloat32To16Array(&half, &testdata[i].single_in, 1); + ok(out == &half, "Got %p, expected %p.\n", out, &half); + ok(half.value == testdata[i].half_ver1 || half.value == testdata[i].half_ver2, + "Got %x, expected %x or %x for index %d.\n", half.value, testdata[i].half_ver1, + testdata[i].half_ver2, i); + + out = D3DXFloat16To32Array(&single, (D3DXFLOAT16 *)&testdata[i].half_ver1, 1); + ok(out == &single, "Got %p, expected %p.\n", out, &single); + ok(relative_error(single, testdata[i].single_out_ver1) < admitted_error, + "Got %f, expected %f for index %d.\n", single, testdata[i].single_out_ver1, i); + + out = D3DXFloat16To32Array(&single, (D3DXFLOAT16 *)&testdata[i].half_ver2, 1); + ok(out == &single, "Got %p, expected %p.\n", out, &single); + ok(relative_error(single, testdata[i].single_out_ver2) < admitted_error, + "Got %f, expected %f for index %d.\n", single, testdata[i].single_out_ver2, i); + } + + { + HMODULE dll_handle = NULL; + D3DXFLOAT16 res, res2; + D3DXFLOAT16* (WINAPI * float32to16)(D3DXFLOAT16 *pout, CONST FLOAT *pin, UINT n); + union + { + float f; + DWORD d; + } x; + + dll_handle = LoadLibraryA("d3dx9_36_2.dll"); + if (!dll_handle) skip("init: Could not load d3dx9_36_2.dll.\n"); + + float32to16 = (void *)GetProcAddress(dll_handle, "D3DXFloat32To16Array"); + if (!float32to16) + { + FreeLibrary(dll_handle); + skip("init: Could not get function pointer (D3DXFloat32To16Array).\n"); + } + + for (i = 0; i < 0xfffff000; i += 0xff) + { + x.d = i; + float32to16(&res, &x.f, 1); + D3DXFloat32To16Array(&res2, &x.f, 1); + // singles2halfp(&res2, &x.f, 1); + + if (res.value != res2.value) + { + unsigned int *ptr = (unsigned int *)&x.f; + printf("%s: res2 - res = %d\n", single_to_binary(*ptr), res2.value - res.value); + float_32_to_16(x.f); + // ok(res.value == res2.value, "Failed i=%#x f=%f (%#x!=%#x)\n", i, x.f, res.value, res2.value); + /* + printf("\t%s: ", single_to_binary(*ptr)); + printf("%s !=", half_to_binary(res.value)); + printf(" %s\n", half_to_binary(res2.value)); + */ + } + //trace("i=%#x f=%f (%#x!=%#x)\n", i, x.f, res.value, res2.value); + } + FreeLibrary(dll_handle); + } +} + START_TEST(math) { D3DXColorTest(); @@ -2230,4 +2562,5 @@ START_TEST(math) test_Matrix_Decompose(); test_Matrix_Transformation2D(); test_D3DXVec_Array(); + test_D3DXFloat_Array(); } diff --git a/include/d3dx9math.h b/include/d3dx9math.h index f842e3e..cdb1deb 100644 --- a/include/d3dx9math.h +++ b/include/d3dx9math.h @@ -261,6 +261,21 @@ typedef struct D3DXCOLOR FLOAT r, g, b, a; } D3DXCOLOR, *LPD3DXCOLOR; +typedef struct D3DXFLOAT16 +{ +#ifdef __cplusplus + D3DXFLOAT16(); + D3DXFLOAT16(FLOAT f); + D3DXFLOAT16(CONST D3DXFLOAT16 &f); + + operator FLOAT (); + + BOOL operator == (CONST D3DXFLOAT16 &) const; + BOOL operator != (CONST D3DXFLOAT16 &) const; +#endif /* __cplusplus */ + WORD value; +} D3DXFLOAT16, *LPD3DXFLOAT16; + #ifdef __cplusplus extern "C" { #endif @@ -358,6 +373,9 @@ D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, CONST D3DXVECTOR4 *pv); D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, CONST D3DXVECTOR4 *pv, CONST D3DXMATRIX *pm); D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4 *pout, UINT outstride, CONST D3DXVECTOR4 *pv, UINT vstride, CONST D3DXMATRIX *pm, UINT n); +D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, CONST FLOAT *pin, UINT n); +FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, CONST D3DXFLOAT16 *pin, UINT n); + #ifdef __cplusplus } #endif diff --git a/include/d3dx9math.inl b/include/d3dx9math.inl index 3cd078a..3f55aef 100644 --- a/include/d3dx9math.inl +++ b/include/d3dx9math.inl @@ -851,6 +851,37 @@ inline BOOL D3DXCOLOR::operator != (CONST D3DXCOLOR& col) const return r != col.r || g != col.g || b != col.b || a != col.a; } +inline D3DXFLOAT16::D3DXFLOAT16() +{ +} + +inline D3DXFLOAT16::D3DXFLOAT16(FLOAT f) +{ + D3DXFloat32To16Array(this, &f, 1); +} + +inline D3DXFLOAT16::D3DXFLOAT16(CONST D3DXFLOAT16 &f) +{ + value = f.value; +} + +inline D3DXFLOAT16::operator FLOAT () +{ + FLOAT f; + D3DXFloat16To32Array(&f, this, 1); + return f; +} + +inline BOOL D3DXFLOAT16::operator == (CONST D3DXFLOAT16 &f) const +{ + return value == f.value; +} + +inline BOOL D3DXFLOAT16::operator != (CONST D3DXFLOAT16 &f) const +{ + return value != f.value; +} + #endif /* __cplusplus */ /*_______________D3DXCOLOR_____________________*/