The correct constants are given to approximately 64-bit precision in "Code Generation and Factoring for Fast Evaluation of Low-order Spherical Harmonic Products and Squares" (2006) by John Snyder.
Signed-off-by: Alex Henrie alexhenrie24@gmail.com --- Fixes the test failure on 64-bit Linux.
https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-2006-...
dlls/d3dx9_36/math.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index 8f5f5b2413..856e987cb9 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -2573,27 +2573,27 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
out[0] = 0.28209479f * a[0] * b[0];
- ta = 0.28209479f * a[0] - 0.12615662f * a[6] - 0.21850968f * a[8]; - tb = 0.28209479f * b[0] - 0.12615662f * b[6] - 0.21850968f * b[8]; + ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8]; + tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8]; out[1] = ta * b[1] + tb * a[1]; t = a[1] * b[1]; out[0] += 0.28209479f * t; - out[6] = -0.12615662f * t; - out[8] = -0.21850968f * t; + out[6] = -0.12615663f * t; + out[8] = -0.21850969f * t;
- ta = 0.21850968f * a[5]; - tb = 0.21850968f * b[5]; + ta = 0.21850969f * a[5]; + tb = 0.21850969f * b[5]; out[1] += ta * b[2] + tb * a[2]; out[2] = ta * b[1] + tb * a[1]; t = a[1] * b[2] +a[2] * b[1]; - out[5] = 0.21850968f * t; + out[5] = 0.21850969f * t;
- ta = 0.21850968f * a[4]; - tb = 0.21850968f * b[4]; + ta = 0.21850969f * a[4]; + tb = 0.21850969f * b[4]; out[1] += ta * b[3] + tb * a[3]; out[3] = ta * b[1] + tb * a[1]; t = a[1] * b[3] + a[3] * b[1]; - out[4] = 0.21850968f * t; + out[4] = 0.21850969f * t;
ta = 0.28209480f * a[0] + 0.25231326f * a[6]; tb = 0.28209480f * b[0] + 0.25231326f * b[6]; @@ -2629,14 +2629,14 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b) out[4] += ta * b[5] + tb * a[5]; out[5] += ta * b[4] + tb * a[4]; t = a[4] * b[5] + a[5] * b[4]; - out[7] += 0.15607834f * t; + out[7] += 0.15607835f * t;
- ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8]; - tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8]; + ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8]; + tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8]; out[5] += ta * b[5] + tb * a[5]; t = a[5] * b[5]; out[0] += 0.28209479f * t; - out[6] += 0.09011186f * t; + out[6] += 0.09011188f * t; out[8] -= 0.15607835f * t;
ta = 0.28209480f * a[0]; @@ -2646,12 +2646,12 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b) out[0] += 0.28209480f * t; out[6] += 0.18022376f * t;
- ta = 0.28209479f * a[0] + 0.09011186f * a[6] + 0.15607835f * a[8]; - tb = 0.28209479f * b[0] + 0.09011186f * b[6] + 0.15607835f * b[8]; + ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8]; + tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8]; out[7] += ta * b[7] + tb * a[7]; t = a[7] * b[7]; out[0] += 0.28209479f * t; - out[6] += 0.09011186f * t; + out[6] += 0.09011188f * t; out[8] += 0.15607835f * t;
ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2018-02-06 4:47 GMT+01:00 Alex Henrie alexhenrie24@gmail.com:
The correct constants are given to approximately 64-bit precision in "Code Generation and Factoring for Fast Evaluation of Low-order Spherical Harmonic Products and Squares" (2006) by John Snyder.
Signed-off-by: Alex Henrie alexhenrie24@gmail.com
Fixes the test failure on 64-bit Linux.
https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-2006-...
dlls/d3dx9_36/math.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/dlls/d3dx9_36/math.c b/dlls/d3dx9_36/math.c index 8f5f5b2413..856e987cb9 100644 --- a/dlls/d3dx9_36/math.c +++ b/dlls/d3dx9_36/math.c @@ -2573,27 +2573,27 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
out[0] = 0.28209479f * a[0] * b[0];
- ta = 0.28209479f * a[0] - 0.12615662f * a[6] - 0.21850968f * a[8];
- tb = 0.28209479f * b[0] - 0.12615662f * b[6] - 0.21850968f * b[8];
- ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
- tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8]; out[1] = ta * b[1] + tb * a[1]; t = a[1] * b[1]; out[0] += 0.28209479f * t;
- out[6] = -0.12615662f * t;
- out[8] = -0.21850968f * t;
- out[6] = -0.12615663f * t;
- out[8] = -0.21850969f * t;
- ta = 0.21850968f * a[5];
- tb = 0.21850968f * b[5];
- ta = 0.21850969f * a[5];
- tb = 0.21850969f * b[5]; out[1] += ta * b[2] + tb * a[2]; out[2] = ta * b[1] + tb * a[1]; t = a[1] * b[2] +a[2] * b[1];
- out[5] = 0.21850968f * t;
- out[5] = 0.21850969f * t;
- ta = 0.21850968f * a[4];
- tb = 0.21850968f * b[4];
- ta = 0.21850969f * a[4];
- tb = 0.21850969f * b[4]; out[1] += ta * b[3] + tb * a[3]; out[3] = ta * b[1] + tb * a[1]; t = a[1] * b[3] + a[3] * b[1];
- out[4] = 0.21850968f * t;
out[4] = 0.21850969f * t;
ta = 0.28209480f * a[0] + 0.25231326f * a[6]; tb = 0.28209480f * b[0] + 0.25231326f * b[6];
@@ -2629,14 +2629,14 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b) out[4] += ta * b[5] + tb * a[5]; out[5] += ta * b[4] + tb * a[4]; t = a[4] * b[5] + a[5] * b[4];
- out[7] += 0.15607834f * t;
- out[7] += 0.15607835f * t;
- ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
- tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
- ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8];
- tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8]; out[5] += ta * b[5] + tb * a[5]; t = a[5] * b[5]; out[0] += 0.28209479f * t;
- out[6] += 0.09011186f * t;
out[6] += 0.09011188f * t; out[8] -= 0.15607835f * t;
ta = 0.28209480f * a[0];
@@ -2646,12 +2646,12 @@ FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b) out[0] += 0.28209480f * t; out[6] += 0.18022376f * t;
- ta = 0.28209479f * a[0] + 0.09011186f * a[6] + 0.15607835f * a[8];
- tb = 0.28209479f * b[0] + 0.09011186f * b[6] + 0.15607835f * b[8];
- ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8];
- tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8]; out[7] += ta * b[7] + tb * a[7]; t = a[7] * b[7]; out[0] += 0.28209479f * t;
- out[6] += 0.09011186f * t;
out[6] += 0.09011188f * t; out[8] += 0.15607835f * t;
ta = 0.28209479f * a[0] - 0.18022375f * a[6];
Nice job!
Not your fault but I think it would be nicer to define some constants (probably #defines, but actual const variables should also work) instead of replicating the same few coefficients many times. Unfortunately it looks like there is no standard naming for those constants. It's up to you, my thought at the moment is to go for something like:
#define SH_MULT_COEFF_0_28 0.28209479f
and so on.
2018-02-07 11:23 GMT-07:00 Matteo Bruni matteo.mystral@gmail.com:
Not your fault but I think it would be nicer to define some constants (probably #defines, but actual const variables should also work) instead of replicating the same few coefficients many times. Unfortunately it looks like there is no standard naming for those constants. It's up to you, my thought at the moment is to go for something like:
#define SH_MULT_COEFF_0_28 0.28209479f
and so on.
We have to distinguish between 0.28209479 and 0.28209480, so I don't think that using named constants is going to make this code more readable, unfortunately.
-Alex
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- I missed the multiple close-but-not-quite-identical constants in the paper. I'm not sure that they should actually differ but I don't feel like recomputing those coefficients AND it might well be that using more accurate coefficients would "break" our implementation by making it generate different results compared to native. So yeah, let's take your patch as-is.