From 0f290bd4c98b15e236dfeb30ecd72111dd91291e Mon Sep 17 00:00:00 2001
From: Stefan Doesinger <stefan@codeweavers.com>
Date: Fri, 31 Aug 2007 20:32:52 +0200
Subject: [PATCH] WineD3D: Fix the z range

D3D's output z range after transformation is [0;1], but opengl's is
[-1;1]. OpenGL scales [-1;1] to [0;1] before the Z test, so all our
geometry ends up in the range [0.5;1]. This causes severe precision loss
in the Z buffer, resulting in Z fighting, breaks the near clipping plane
and breaks Z comparison against values written into the z buffer using
clear or depthfill.

This patch adds a test for untransformed, transformed and vertex shader
paths and scales the z range from [0;1] to [-1;1] before passing it to
opengl. Also a comment is added for the clipping disabling workaround.

Fixing the depthout value from pixel shaders is not needed because it is
in the range [0;1] already. It runs after opengl's [-1;1] -> [0;1]
scaling.
---
 dlls/d3d9/tests/visual.c    |  188 +++++++++++++++++++++++++++++++++++++++----
 dlls/wined3d/state.c        |   33 +++++++-
 dlls/wined3d/vertexshader.c |   12 +++
 3 files changed, 213 insertions(+), 20 deletions(-)

diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c
index 2c15d1c..16bb3d2 100644
--- a/dlls/d3d9/tests/visual.c
+++ b/dlls/d3d9/tests/visual.c
@@ -131,6 +131,12 @@ struct vertex
     DWORD diffuse;
 };
 
+struct tvertex
+{
+    float x, y, z, rhw;
+    DWORD diffuse;
+};
+
 struct nvertex
 {
     float x, y, z;
@@ -1315,18 +1321,52 @@ static void texbem_test(IDirect3DDevice9 *device)
     }
 }
 
-static void present_test(IDirect3DDevice9 *device)
+static void z_range_test(IDirect3DDevice9 *device)
 {
-    struct vertex quad[] =
+    const struct vertex quad[] =
+    {
+        {-1.0f,  0.0f,   1.1f,                          0xffff0000},
+        {-1.0f,  1.0f,   1.1f,                          0xffff0000},
+        { 1.0f,  0.0f,  -1.1f,                          0xffff0000},
+        { 1.0f,  1.0f,  -1.1f,                          0xffff0000},
+    };
+    const struct vertex quad2[] =
+    {
+        {-1.0f,  0.0f,   1.1f,                          0xff0000ff},
+        {-1.0f,  1.0f,   1.1f,                          0xff0000ff},
+        { 1.0f,  0.0f,  -1.1f,                          0xff0000ff},
+        { 1.0f,  1.0f,  -1.1f,                          0xff0000ff},
+    };
+
+    const struct tvertex quad3[] =
+    {
+        {    0,   240,   1.1f,  1.0,                    0xffffff00},
+        {    0,   480,   1.1f,  1.0,                    0xffffff00},
+        {  640,   240,  -1.1f,  1.0,                    0xffffff00},
+        {  640,   480,  -1.1f,  1.0,                    0xffffff00},
+    };
+    const struct tvertex quad4[] =
     {
-        {-1.0f, -1.0f,   0.9f,                          0xffff0000},
-        {-1.0f,  1.0f,   0.9f,                          0xffff0000},
-        { 1.0f, -1.0f,   0.1f,                          0xffff0000},
-        { 1.0f,  1.0f,   0.1f,                          0xffff0000},
+        {    0,   240,   1.1f,  1.0,                    0xff00ff00},
+        {    0,   480,   1.1f,  1.0,                    0xff00ff00},
+        {  640,   240,  -1.1f,  1.0,                    0xff00ff00},
+        {  640,   480,  -1.1f,  1.0,                    0xff00ff00},
     };
     HRESULT hr;
     DWORD color;
-
+    IDirect3DVertexShader9 *shader;
+    IDirect3DVertexDeclaration9 *decl;
+    const DWORD shader_code[] = {
+        0xfffe0101,                                     /* vs_1_1           */
+        0x0000001f, 0x80000000, 0x900f0000,             /* dcl_position v0  */
+        0x00000001, 0xc00f0000, 0x90e40000,             /* mov oPos, v0     */
+        0x00000001, 0xd00f0000, 0xa0e40000,             /* mov oD0, c0      */
+        0x0000ffff                                      /* end              */
+    };
+    static const D3DVERTEXELEMENT9 decl_elements[] = {
+        {0, 0,  D3DDECLTYPE_FLOAT3,   D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0},
+        D3DDECL_END()
+    };
     /* Does the Present clear the depth stencil? Clear the depth buffer with some value != 0,
      * then call Present. Then clear the color buffer to make sure it has some defined content
      * after the Present with D3DSWAPEFFECT_DISCARD. After that draw a plane that is somewhere cut
@@ -1337,8 +1377,12 @@ static void present_test(IDirect3DDevice9 *device)
     hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
     hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET, 0xffffffff, 0.4, 0);
 
+    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_CLIPPING, TRUE);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
     hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE);
     ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, FALSE);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
     hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_GREATER);
     ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
     hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_DIFFUSE);
@@ -1348,23 +1392,137 @@ static void present_test(IDirect3DDevice9 *device)
     ok(hr == D3D_OK, "IDirect3DDevice9_BeginScene failed with %s\n", DXGetErrorString9(hr));
     if(hr == D3D_OK)
     {
-        /* No lights are defined... That means, lit vertices should be entirely black */
+        /* Test the untransformed vertex path */
         hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad, sizeof(quad[0]));
         ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
+        hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_LESS);
+        ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+        hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad2, sizeof(quad2[0]));
+        ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
+
+        /* Test the transformed vertex path */
+        hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZRHW | D3DFVF_DIFFUSE);
+        ok(hr == D3D_OK, "IDirect3DDevice9_SetFVF returned %s\n", DXGetErrorString9(hr));
+
+        hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad4, sizeof(quad4[0]));
+        ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
+        hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_GREATER);
+        ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+        hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad3, sizeof(quad3[0]));
+        ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
 
         hr = IDirect3DDevice9_EndScene(device);
         ok(hr == D3D_OK, "IDirect3DDevice9_EndScene failed with %s\n", DXGetErrorString9(hr));
     }
 
-    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_FALSE);
-    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+    hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
+    ok(SUCCEEDED(hr), "Present failed (0x%08x)\n", hr);
+
+    /* Do not test the exact corner pixels, but go pretty close to them */
+
+    /* Clipped because z > 1.0 */
+    color = getPixelColor(device, 28, 238);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+    color = getPixelColor(device, 28, 241);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+
+    /* Not clipped, > z buffer clear value(0.75) */
+    color = getPixelColor(device, 31, 238);
+    ok(color == 0x00ff0000, "Z range failed: Got color 0x%08x, expected 0x00ff0000.\n", color);
+    color = getPixelColor(device, 31, 241);
+    ok(color == 0x00ffff00, "Z range failed: Got color 0x%08x, expected 0x00ffff00.\n", color);
+    color = getPixelColor(device, 100, 238);
+    ok(color == 0x00ff0000, "Z range failed: Got color 0x%08x, expected 0x00ff0000.\n", color);
+    color = getPixelColor(device, 100, 241);
+    ok(color == 0x00ffff00, "Z range failed: Got color 0x%08x, expected 0x00ffff00.\n", color);
+
+    /* Not clipped, < z buffer clear value */
+    color = getPixelColor(device, 104, 238);
+    ok(color == 0x000000ff, "Z range failed: Got color 0x%08x, expected 0x000000ff.\n", color);
+    color = getPixelColor(device, 104, 241);
+    ok(color == 0x0000ff00, "Z range failed: Got color 0x%08x, expected 0x0000ff00.\n", color);
+    color = getPixelColor(device, 318, 238);
+    ok(color == 0x000000ff, "Z range failed: Got color 0x%08x, expected 0x000000ff.\n", color);
+    color = getPixelColor(device, 318, 241);
+    ok(color == 0x0000ff00, "Z range failed: Got color 0x%08x, expected 0x0000ff00.\n", color);
+
+    /* Clipped because z < 0.0 */
+    color = getPixelColor(device, 321, 238);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+    color = getPixelColor(device, 321, 241);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+
+    /* Test the shader path */
+    hr = IDirect3DDevice9_CreateVertexShader(device, shader_code, &shader);
+    if(FAILED(hr)) {
+        skip("Can't create test vertex shader, most likely shaders not supported\n");
+        goto out;
+    }
+    hr = IDirect3DDevice9_CreateVertexDeclaration(device, decl_elements, &decl);
+
+    hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET, 0xffffffff, 0.4, 0);
+
+    IDirect3DDevice9_SetVertexDeclaration(device, decl);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexDeclaration returned %s\n", DXGetErrorString9(hr));
+    IDirect3DDevice9_SetVertexShader(device, shader);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader returned %s\n", DXGetErrorString9(hr));
+
+    hr = IDirect3DDevice9_BeginScene(device);
+    ok(hr == D3D_OK, "IDirect3DDevice9_BeginScene failed with %s\n", DXGetErrorString9(hr));
+    if(hr == D3D_OK)
+    {
+        float colorf[] = {1.0, 0.0, 0.0, 1.0};
+        float colorf2[] = {0.0, 0.0, 1.0, 1.0};
+        IDirect3DDevice9_SetVertexShaderConstantF(device, 0, colorf, 1);
+        hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad, sizeof(quad[0]));
+        ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
+        hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_LESS);
+        ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+        IDirect3DDevice9_SetVertexShaderConstantF(device, 0, colorf2, 1);
+        hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2 /*PrimCount */, quad2, sizeof(quad2[0]));
+        ok(hr == D3D_OK, "IDirect3DDevice9_DrawIndexedPrimitiveUP failed with %s\n", DXGetErrorString9(hr));
+
+        hr = IDirect3DDevice9_EndScene(device);
+        ok(hr == D3D_OK, "IDirect3DDevice9_EndScene failed with %s\n", DXGetErrorString9(hr));
+    }
+
+    IDirect3DDevice9_SetVertexDeclaration(device, NULL);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexDeclaration returned %s\n", DXGetErrorString9(hr));
+    IDirect3DDevice9_SetVertexShader(device, NULL);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader returned %s\n", DXGetErrorString9(hr));
+
+    IDirect3DVertexDeclaration9_Release(decl);
+    IDirect3DVertexShader9_Release(shader);
 
     hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
     ok(SUCCEEDED(hr), "Present failed (0x%08x)\n", hr);
-    color = getPixelColor(device, 512, 240);
-    ok(color == 0x00ffffff, "Present failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
-    color = getPixelColor(device, 64, 240);
-    ok(color == 0x00ff0000, "Present failed: Got color 0x%08x, expected 0x00ff0000.\n", color);
+    /* Z < 1.0 */
+    color = getPixelColor(device, 28, 238);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+
+    /* 1.0 < z < 0.75 */
+    color = getPixelColor(device, 31, 238);
+    ok(color == 0x00ff0000, "Z range failed: Got color 0x%08x, expected 0x00ff0000.\n", color);
+    color = getPixelColor(device, 100, 238);
+    ok(color == 0x00ff0000, "Z range failed: Got color 0x%08x, expected 0x00ff0000.\n", color);
+
+    /* 0.75 < z < 0.0 */
+    color = getPixelColor(device, 104, 238);
+    ok(color == 0x000000ff, "Z range failed: Got color 0x%08x, expected 0x000000ff.\n", color);
+    color = getPixelColor(device, 318, 238);
+    ok(color == 0x000000ff, "Z range failed: Got color 0x%08x, expected 0x000000ff.\n", color);
+
+    /* 0.0 < z */
+    color = getPixelColor(device, 321, 238);
+    ok(color == 0x00ffffff, "Z range failed: Got color 0x%08x, expected 0x00ffffff.\n", color);
+
+    out:
+    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_FALSE);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_CLIPPING, FALSE);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
+    hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE);
+    ok(hr == D3D_OK, "IDirect3DDevice9_SetRenderState returned %s\n", DXGetErrorString9(hr));
 }
 
 static void fill_surface(IDirect3DSurface9 *surface, DWORD color)
@@ -2266,7 +2424,7 @@ START_TEST(visual)
     } else {
         skip("No cube texture support\n");
     }
-    present_test(device_ptr);
+    z_range_test(device_ptr);
     if(caps.TextureCaps & D3DPTEXTURECAPS_MIPMAP)
     {
         maxmip_test(device_ptr);
diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c
index fe25e4b..2da7bd8 100644
--- a/dlls/wined3d/state.c
+++ b/dlls/wined3d/state.c
@@ -2615,7 +2615,23 @@ static void transform_projection(DWORD state, IWineD3DStateBlockImpl *stateblock
             /* Transformed vertices are supposed to bypass the whole transform pipeline including
              * frustum clipping. This can't be done in opengl, so this code adjusts the Z range to
              * suppress depth clipping. This can be done because it is an orthogonal projection and
-             * the Z coordinate does not affect the size of the primitives
+             * the Z coordinate does not affect the size of the primitives. Half Life 1 and Prince of
+             * Persia 3D need this.
+             *
+             * Note that using minZ and maxZ here doesn't entirely fix the problem, since view frustum
+             * clipping is still enabled, but it seems to fix it for all apps tested so far. A minor
+             * problem can be witnessed in half-life 1 engine based games, the weapon is clipped close
+             * to the viewer.
+             *
+             * Also note that this breaks z comparison against z values filled in with clear,
+             * but no app depending on that and disabled clipping has been found yet. Comparing
+             * primitives against themselves works, so the Z buffer is still intact for normal hidden
+             * surface removal.
+             *
+             * We could disable clipping entirely by setting the near to infinity and far to -infinity,
+             * but this would break Z buffer operation. Raising the range to something less than
+             * infinity would help a bit at the cost of Z precision, but it wouldn't eliminate the
+             * problem either.
              */
             TRACE("Calling glOrtho with %f, %f, %f, %f\n", width, height, -minZ, -maxZ);
             if(stateblock->wineD3DDevice->render_offscreen) {
@@ -2633,9 +2649,9 @@ static void transform_projection(DWORD state, IWineD3DStateBlockImpl *stateblock
              */
             TRACE("Calling glOrtho with %f, %f, %f, %f\n", width, height, 1.0, -1.0);
             if(stateblock->wineD3DDevice->render_offscreen) {
-                glOrtho(X, X + width, -Y, -Y - height, 1.0, -1.0);
+                glOrtho(X, X + width, -Y, -Y - height, 0.0, -1.0);
             } else {
-                glOrtho(X, X + width, Y + height, Y, 1.0, -1.0);
+                glOrtho(X, X + width, Y + height, Y, 0.0, -1.0);
             }
         }
         checkGLcall("glOrtho");
@@ -2661,9 +2677,16 @@ static void transform_projection(DWORD state, IWineD3DStateBlockImpl *stateblock
             1.0 / Width is used because the coord range goes from -1.0 to 1.0, then we
             divide by the Width/Height, so we need the half range(1.0) to translate by
             half a pixel.
+
+            The other fun is that d3d's output z range after the transformation is [0;1],
+            but opengl's is [-1;1]. Since the z buffer is in range [0;1] for both, gl
+            scales [-1;1] to [0;1]. This would mean that we end up in [0.5;1] and loose a lot
+            of Z buffer precision and the clear values do not match in the z test. Thus scale
+            [0;1] to [-1;1], so when gl undoes that we utilize the full z range
          */
-        glTranslatef(1.0 / stateblock->viewport.Width, -1.0/ stateblock->viewport.Height, 0);
-        checkGLcall("glTranslatef (1.0 / width, -1.0 / height, 0)");
+        glTranslatef(1.0 / stateblock->viewport.Width, -1.0/ stateblock->viewport.Height, -1.0);
+        checkGLcall("glTranslatef (1.0 / width, -1.0 / height, -1.0)");
+        glScalef(1.0, 1.0, 2.0);
 
         /* D3D texture coordinates are flipped compared to OpenGL ones, so
             * render everything upside down when rendering offscreen. */
diff --git a/dlls/wined3d/vertexshader.c b/dlls/wined3d/vertexshader.c
index 8f4f1fd..b478b5c 100644
--- a/dlls/wined3d/vertexshader.c
+++ b/dlls/wined3d/vertexshader.c
@@ -353,6 +353,13 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader(
          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
          */
         shader_addline(&buffer, "gl_Position.y = gl_Position.y * posFixup[1];\n");
+        /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
+         *
+         * Basically we want(in homogenous coordinates) z = z * 2 - 1. However, shaders are run
+         * before the homogenous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
+         * which is the same as z = z / 2 - w.
+         */
+        shader_addline(&buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
 
         shader_addline(&buffer, "}\n");
 
@@ -368,6 +375,7 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader(
 
         /*  Create the hw ARB shader */
         shader_addline(&buffer, "!!ARBvp1.0\n");
+        shader_addline(&buffer, "PARAM zfixup = { 2.0, -1.0, 0.0, 0.0 };\n");
 
         /* Mesa supports only 95 constants */
         if (GL_VEND(MESA) || GL_VEND(WINE))
@@ -399,6 +407,10 @@ static VOID IWineD3DVertexShaderImpl_GenerateShader(
          *  (this is handled in drawPrim() when it sets the MODELVIEW and PROJECTION matrices)
          */
         shader_addline(&buffer, "MUL TMP_OUT.y, TMP_OUT.y, posFixup.y;\n");
+        /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
+         * and the glsl equivalent
+         */
+        shader_addline(&buffer, "MAD TMP_OUT.z, TMP_OUT.z, zfixup.x, -TMP_OUT.w;\n");
 
         shader_addline(&buffer, "MOV result.position, TMP_OUT;\n");
         
-- 
1.5.2.2

