Signed-off-by: Daniel Ansorregui [email protected] --- include/d3d9types.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/include/d3d9types.h b/include/d3d9types.h index 4891858b3d..2e5648a8f6 100644 --- a/include/d3d9types.h +++ b/include/d3d9types.h @@ -827,6 +827,8 @@ typedef enum _D3DFORMAT { D3DFMT_MULTI2_ARGB8 = MAKEFOURCC('M', 'E', 'T', '1'), D3DFMT_G8R8_G8B8 = MAKEFOURCC('G', 'R', 'G', 'B'), D3DFMT_R8G8_B8G8 = MAKEFOURCC('R', 'G', 'B', 'G'), + D3DFMT_DF16 = MAKEFOURCC('D', 'F', '1', '6'), + D3DFMT_DF24 = MAKEFOURCC('D', 'F', '2', '4'),
D3DFMT_D16_LOCKABLE = 70, D3DFMT_D32 = 71,
- Test texld/texldp/texldd/texldb/texldl in PS and FFP - Test supported/unsupported texture formats on FFP/texld/texldp - Test 3dtextures (Disabled due to FIXME in wine) - Test depth textures on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/d3d9/tests/visual.c | 568 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 51229f2b82..20c19f13f7 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15104,6 +15104,573 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + /* Test texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + /* Test texldp : It should not apply any projection with Fetch4. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Sampling LOD gradient should be ignored. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * The explicit LOD level is then ignored. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_3d[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + + static const struct + { + struct vec3 position; + struct vec3 texcoord; + } + quad[] = + { + /* Tilted on Z axis to get a depth gradient in the depth test */ + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } + }; + + static const struct + { + UINT x, y; + D3DCOLOR color; + } + expected_colors[] = + { + { 40, 30, 0x23102013},{160, 30, 0x22132312},{320, 30, 0x21122211}, + {480, 30, 0x20112110},{600, 30, 0x23102013}, + { 40,120, 0x13011002},{160,120, 0x120213f2},{320,120, 0x11f212f1}, + {480,120, 0x10f11101},{600,120, 0x13011002}, + { 40,240, 0x02030104},{160,240, 0xf20402f4},{320,240, 0xf1f4f2f3}, + {480,240, 0x01f3f103},{600,240, 0x02030104}, + { 40,360, 0x04200323},{160,360, 0xf4230422},{320,360, 0xf322f421}, + {480,360, 0x0321f320},{600,360, 0x04200323}, + { 40,450, 0x23102013},{160,450, 0x22132312},{320,450, 0x21122211}, + {480,450, 0x20112110},{600,450, 0x23102013}, + }; + + static const DWORD fetch4_data[] = {0x10111213, 0x01f1f202, 0x03f3f404, 0x20212223}; + + static struct + { + IDirect3DVertexShader9 *vs; + IDirect3DPixelShader9 *ps; + const DWORD *ps_code; + const char *name; + } + shaders[] = + { + {NULL, NULL, NULL, "Fixed Function Pipeline"}, + {NULL, NULL, ps_code_texld, "texld"}, + {NULL, NULL, ps_code_texldp, "texldp"}, + {NULL, NULL, ps_code_texldd, "texldd"}, + {NULL, NULL, ps_code_texldb, "texldb"}, + {NULL, NULL, ps_code_texldl, "texldl"}, + }; + + static const struct + { + D3DFORMAT format; /* The format of the texture */ + DWORD data; /* The data we will write to the first line */ + D3DCOLOR expected_color[3]; /* Test FFP, texld and texldp as in "shaders" struct */ + UINT x, y; /* Where we expect the color to be */ + BOOL disable_alpha_wine; /* Do not check alpha color on wine (A8 is known to be broken) */ + } + format_tests[] = + { + /* Enabled formats */ + {D3DFMT_L8, 0xff804010, {0x00400010, 0x00400010, 0x00400010}, 40, 30, FALSE}, + {D3DFMT_L16, 0xff804010, {0x00ff0040, 0x00ff0040, 0x00ff0040}, 40, 30, FALSE}, + {D3DFMT_R16F, 0x38003c00, {0x008000ff, 0x008000ff, 0x008000ff}, 40, 30, FALSE}, + {D3DFMT_R32F, 0x3f000000, {0x00000080, 0x00000080, 0x00000080}, 40, 30, FALSE}, + + /* Disabled formats */ + {D3DFMT_A8, 0xff804010, {0x00000000, 0x00000000, 0x00000000}, 40, 30, TRUE}, + {D3DFMT_A8R8G8B8, 0xff804010, {0x64321906, 0x64321906, 0x562b1605}, 40, 30, FALSE}, + }; + + static const struct + { + D3DCOLOR color[2]; /* FETCH4 off and on */ + UINT x, y; + } + expected_depth[][4] = + { + { + /* This is the expected result for shadow samplers */ + {{0x8f8f8f8f,0x8f8f8f8f}, 20, 15}, + {{0xbfbfbfbf,0xbfbfbfbf},240, 15}, + {{0x60606060,0x60606060}, 20,240}, + {{0x40404040,0x40404040},240,120}, + }, + { + /* This is the expected result with DF16 */ + {{0xff9b00ff,0x202000ff}, 20, 15}, + {{0xff8300ff,0x00bf009f},240, 15}, + {{0xff6c00ff,0x9f000080}, 20,240}, + {{0xff8000ff,0x80809f60},240,120}, + }, + { + /* This is the expected result with DF24 */ + {{0xff9bffff,0x202000ff}, 20, 15}, + {{0xff83ffff,0x00bf009f},240, 15}, + {{0xff6cffff,0x9f000080}, 20,240}, + {{0xff80ffff,0x80809f60},240,120}, + } + }; + + static const struct + { + D3DFORMAT format; + const char *name; + UINT index; + } + depth_tests[] = + { + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, + {D3DFMT_D32, "D32", 0}, + {D3DFMT_D15S1, "D15S1", 0}, + {D3DFMT_D24S8, "D24S8", 0}, + {D3DFMT_D24X8, "D24X8", 0}, + {D3DFMT_D24X4S4, "D24X4S4", 0}, + {D3DFMT_D16, "D16", 0}, + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, + {D3DFMT_D24FS8, "D24FS8", 0}, + {D3DFMT_DF16, "DF16", 1}, + {D3DFMT_DF24, "DF24", 2}, + }; + + IDirect3DSurface9 *original_ds, *original_rt, *rt; + IDirect3DVolumeTexture9 *texture3D; + IDirect3DPixelShader9 *ps_3d; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DTexture9 *texture; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + D3DLOCKED_BOX lb; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + UINT i, j, k; + HWND window; + HRESULT hr; + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 shader testing */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + for (i = 0; i < 4; ++i) + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + /* Prepare the pixel shaders */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + if (shaders[i].ps_code) + { + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + /* Copy vertex shader pointer if a PS is present */ + shaders[i].vs = vs; + } + } + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 + and also D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed + So the tests exercices that only GET4 is required, and any other parameter will work */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /*********************************************************************** + * Tests for FFP/PS correctness when using L8 texture with fetch4. * + ***********************************************************************/ + + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + ok(color_match(color, expected_colors[j].color, 1), + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /*************************************************************************** + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * + ***************************************************************************/ + + /* Create the textures to test FETCH4 does work/not work there as expected */ + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) + { + IDirect3DTexture9 *tex; + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, format_tests[i].format, + D3DPOOL_MANAGED, &tex, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &format_tests[i].data, 4); + hr = IDirect3DTexture9_UnlockRect(tex, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ + for (j = 0; j < 3; ++j) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); + D3DCOLOR expected_color = format_tests[i].expected_color[j]; + /* FIXME: A8 gives different results, therefore we do not test alpha channel + * Fetch4 affects all 4 channels, and the test will catch errors anyway */ + ok(color_match(color & 0x00ffffff, expected_color & 0x00ffffff, 1), + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, + expected_color & 0x00ffffff, format_tests[i].x, format_tests[i].y, color & 0x00ffffff); + /* If the format gives proper alpha result, test the full color */ + todo_wine_if(format_tests[i].disable_alpha_wine) ok(color_match(color, expected_color, 1), + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, + expected_color, format_tests[i].x, format_tests[i].y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + IDirect3DTexture9_Release(tex); + } + + /************************************************** + * Tests that fetch4 works with 3D textures. * + **************************************************/ + + /* Create volume (3D) texture */ + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); + for (i = 0; i < 4; ++i) + { + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); + /* Shift the lower level, to keep it different */ + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); + } + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + + /* Test FFP and texld with dcl_volume (ps_3d) */ + for (i = 0; i < 2; ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + /* FIXME: Fetch4 on 3D texture is like sampling a 2DArray at .xy0 with Fetch4 enabled + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray + * Wine produces same results as if fetch4 is not enabled. Passes on w10 */ + todo_wine ok(color_match(color, expected_colors[j].color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /******************************************************** + * Tests for fetch4 enable/disable with depth textures. * + ********************************************************/ + + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) + { + D3DFORMAT format = depth_tests[i].format; + IDirect3DTexture9 *depth_texture; + IDirect3DSurface9 *ds; + + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) + continue; + + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Setup the depth/stencil surface. */ + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + /* Render to the depth surface */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + IDirect3DSurface9_Release(ds); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + + /* Set a shader for depth sampling, otherwise windows does not show anything */ + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + for (j = 0; j < 2; ++j){ + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Do the actual shadow mapping. */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) + { + UINT x = expected_depth[depth_tests[i].index][k].x; + UINT y = expected_depth[depth_tests[i].index][k].y; + D3DCOLOR expected_color = expected_depth[depth_tests[i].index][k].color[j]; + D3DCOLOR color = get_readback_color(&rb, x, y); + /* Geforce 7 on Windows returns 1.0 in alpha when the depth format is D24S8 or D24X8, + * whereas other GPUs (all AMD, newer Nvidia) return the same value they return in .rgb. + * Accept alpha mismatches as broken but make sure to check the color channels. */ + ok(color_match(color, expected_color, 2) + || broken(color_match(color & 0x00ffffff, expected_color & 0x00ffffff, 0)), + "Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + expected_color, x, y, depth_tests[i].name, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + IDirect3DTexture9_Release(depth_texture); + } + + IDirect3DVolumeTexture9_Release(texture3D); + IDirect3DTexture9_Release(texture); + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + if (shaders[i].ps) + IDirect3DPixelShader9_Release(shaders[i].ps); + IDirect3DPixelShader9_Release(ps_3d); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(rt); + IDirect3DSurface9_Release(original_ds); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24480,6 +25047,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Test texld/texldp/texldd/texldb/texldl in PS and FFP - Test supported/unsupported texture formats on FFP/texld/texldp - Test 3dtextures (Disabled due to FIXME in wine) - Test depth textures on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/d3d9/tests/visual.c | 568 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 568 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index 51229f2b82..a3eb30725e 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15104,6 +15104,573 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + /* Test texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + /* Test texldp : It should not apply any projection with Fetch4. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Sampling LOD gradient should be ignored. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * The explicit LOD level is then ignored. Same result as texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_3d[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + + static const struct + { + struct vec3 position; + struct vec3 texcoord; + } + quad[] = + { + /* Tilted on Z axis to get a depth gradient in the depth test */ + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } + }; + + static const struct + { + UINT x, y; + D3DCOLOR color; + } + expected_colors[] = + { + { 40, 30, 0x23102013},{160, 30, 0x22132312},{320, 30, 0x21122211}, + {480, 30, 0x20112110},{600, 30, 0x23102013}, + { 40,120, 0x13011002},{160,120, 0x120213f2},{320,120, 0x11f212f1}, + {480,120, 0x10f11101},{600,120, 0x13011002}, + { 40,240, 0x02030104},{160,240, 0xf20402f4},{320,240, 0xf1f4f2f3}, + {480,240, 0x01f3f103},{600,240, 0x02030104}, + { 40,360, 0x04200323},{160,360, 0xf4230422},{320,360, 0xf322f421}, + {480,360, 0x0321f320},{600,360, 0x04200323}, + { 40,450, 0x23102013},{160,450, 0x22132312},{320,450, 0x21122211}, + {480,450, 0x20112110},{600,450, 0x23102013}, + }; + + static const DWORD fetch4_data[] = {0x10111213, 0x01f1f202, 0x03f3f404, 0x20212223}; + + static struct + { + IDirect3DVertexShader9 *vs; + IDirect3DPixelShader9 *ps; + const DWORD *ps_code; + const char *name; + } + shaders[] = + { + {NULL, NULL, NULL, "Fixed Function Pipeline"}, + {NULL, NULL, ps_code_texld, "texld"}, + {NULL, NULL, ps_code_texldp, "texldp"}, + {NULL, NULL, ps_code_texldd, "texldd"}, + {NULL, NULL, ps_code_texldb, "texldb"}, + {NULL, NULL, ps_code_texldl, "texldl"}, + }; + + static const struct + { + D3DFORMAT format; /* The format of the texture */ + DWORD data; /* The data we will write to the first line */ + D3DCOLOR expected_color[3]; /* Test FFP, texld and texldp as in "shaders" struct */ + UINT x, y; /* Where we expect the color to be */ + BOOL disable_alpha_wine; /* Do not check alpha color on wine (A8 is known to be broken) */ + } + format_tests[] = + { + /* Enabled formats */ + {D3DFMT_L8, 0xff804010, {0x00400010, 0x00400010, 0x00400010}, 40, 30, FALSE}, + {D3DFMT_L16, 0xff804010, {0x00ff0040, 0x00ff0040, 0x00ff0040}, 40, 30, FALSE}, + {D3DFMT_R16F, 0x38003c00, {0x008000ff, 0x008000ff, 0x008000ff}, 40, 30, FALSE}, + {D3DFMT_R32F, 0x3f000000, {0x00000080, 0x00000080, 0x00000080}, 40, 30, FALSE}, + + /* Disabled formats */ + {D3DFMT_A8, 0xff804010, {0x00000000, 0x00000000, 0x00000000}, 40, 30, TRUE}, + {D3DFMT_A8R8G8B8, 0xff804010, {0x64321906, 0x64321906, 0x562b1605}, 40, 30, FALSE}, + }; + + static const struct + { + D3DCOLOR color[2]; /* FETCH4 off and on */ + UINT x, y; + } + expected_depth[][4] = + { + { + /* This is the expected result for shadow samplers */ + {{0x8f8f8f8f,0x8f8f8f8f}, 20, 15}, + {{0xbfbfbfbf,0xbfbfbfbf},240, 15}, + {{0x60606060,0x60606060}, 20,240}, + {{0x40404040,0x40404040},240,120}, + }, + { + /* This is the expected result with DF16 */ + {{0xff9b00ff,0x202000ff}, 20, 15}, + {{0xff8300ff,0x00bf009f},240, 15}, + {{0xff6c00ff,0x9f000080}, 20,240}, + {{0xff8000ff,0x80809f60},240,120}, + }, + { + /* This is the expected result with DF24 */ + {{0xff9bffff,0x202000ff}, 20, 15}, + {{0xff83ffff,0x00bf009f},240, 15}, + {{0xff6cffff,0x9f000080}, 20,240}, + {{0xff80ffff,0x80809f60},240,120}, + } + }; + + static const struct + { + D3DFORMAT format; + const char *name; + UINT index; + } + depth_tests[] = + { + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, + {D3DFMT_D32, "D32", 0}, + {D3DFMT_D15S1, "D15S1", 0}, + {D3DFMT_D24S8, "D24S8", 0}, + {D3DFMT_D24X8, "D24X8", 0}, + {D3DFMT_D24X4S4, "D24X4S4", 0}, + {D3DFMT_D16, "D16", 0}, + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, + {D3DFMT_D24FS8, "D24FS8", 0}, + {MAKEFOURCC('D','F','1','6'), "DF16", 1}, + {MAKEFOURCC('D','F','2','4'), "DF24", 2}, + }; + + IDirect3DSurface9 *original_ds, *original_rt, *rt; + IDirect3DVolumeTexture9 *texture3D; + IDirect3DPixelShader9 *ps_3d; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DTexture9 *texture; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + D3DLOCKED_BOX lb; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + UINT i, j, k; + HWND window; + HRESULT hr; + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 shader testing */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + /* Prepare the pixel shaders */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + if (shaders[i].ps_code) + { + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + /* Copy vertex shader pointer if a PS is present */ + shaders[i].vs = vs; + } + } + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 + and also D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed + So the tests exercices that only GET4 is required, and any other parameter will work */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_LINEAR); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /*********************************************************************** + * Tests for FFP/PS correctness when using L8 texture with fetch4. * + ***********************************************************************/ + + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + ok(color_match(color, expected_colors[j].color, 1), + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /*************************************************************************** + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * + ***************************************************************************/ + + /* Create the textures to test FETCH4 does work/not work there as expected */ + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) + { + IDirect3DTexture9 *tex; + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, format_tests[i].format, + D3DPOOL_MANAGED, &tex, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &format_tests[i].data, 4); + hr = IDirect3DTexture9_UnlockRect(tex, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ + for (j = 0; j < ARRAY_SIZE(format_tests[i].expected_color); ++j) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); + D3DCOLOR expected_color = format_tests[i].expected_color[j]; + /* FIXME: A8 gives different results, therefore we do not test alpha channel + * Fetch4 affects all 4 channels, and the test will catch errors anyway */ + ok(color_match(color & 0x00ffffff, expected_color & 0x00ffffff, 1), + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, + expected_color & 0x00ffffff, format_tests[i].x, format_tests[i].y, color & 0x00ffffff); + /* If the format gives proper alpha result, test the full color */ + todo_wine_if(format_tests[i].disable_alpha_wine) ok(color_match(color, expected_color, 1), + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, + expected_color, format_tests[i].x, format_tests[i].y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + IDirect3DTexture9_Release(tex); + } + + /************************************************** + * Tests that fetch4 works with 3D textures. * + **************************************************/ + + /* Create volume (3D) texture */ + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + { + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); + /* Shift the lower level, to keep it different */ + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); + } + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + + /* Test FFP and texld with dcl_volume (ps_3d) */ + for (i = 0; i < 2; ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors); ++j) + { + D3DCOLOR color = get_readback_color(&rb, expected_colors[j].x, expected_colors[j].y); + /* FIXME: Fetch4 on 3D texture is like sampling a 2DArray at .xy0 with Fetch4 enabled + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray + * Wine produces same results as if fetch4 is not enabled. Passes on w10 */ + todo_wine ok(color_match(color, expected_colors[j].color, 1), + "Expected color 0x%08x at (%u, %u), got 0x%08x.\n", + expected_colors[j].color, expected_colors[j].x, expected_colors[j].y, color); + } + release_surface_readback(&rb); + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /******************************************************** + * Tests for fetch4 enable/disable with depth textures. * + ********************************************************/ + + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) + { + D3DFORMAT format = depth_tests[i].format; + IDirect3DTexture9 *depth_texture; + IDirect3DSurface9 *ds; + + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) + continue; + + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Setup the depth/stencil surface. */ + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + /* Render to the depth surface */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + IDirect3DSurface9_Release(ds); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + + /* Set a shader for depth sampling, otherwise windows does not show anything */ + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + for (j = 0; j < 2; ++j){ + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Do the actual shadow mapping. */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) + { + UINT x = expected_depth[depth_tests[i].index][k].x; + UINT y = expected_depth[depth_tests[i].index][k].y; + D3DCOLOR expected_color = expected_depth[depth_tests[i].index][k].color[j]; + D3DCOLOR color = get_readback_color(&rb, x, y); + /* Geforce 7 on Windows returns 1.0 in alpha when the depth format is D24S8 or D24X8, + * whereas other GPUs (all AMD, newer Nvidia) return the same value they return in .rgb. + * Accept alpha mismatches as broken but make sure to check the color channels. */ + ok(color_match(color, expected_color, 2) + || broken(color_match(color & 0x00ffffff, expected_color & 0x00ffffff, 0)), + "Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + expected_color, x, y, depth_tests[i].name, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + IDirect3DTexture9_Release(depth_texture); + } + + IDirect3DVolumeTexture9_Release(texture3D); + IDirect3DTexture9_Release(texture); + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + if (shaders[i].ps) + IDirect3DPixelShader9_Release(shaders[i].ps); + IDirect3DPixelShader9_Release(ps_3d); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(rt); + IDirect3DSurface9_Release(original_ds); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24480,6 +25047,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Add flag to indicate FETCH4 support in textures - FIXME: 3D textures and fetch4 - Tested under W10+Intel, when Fetch4 is enabled, projection is ignored - The swizzle fix has been checked against windows since it does not match with the one in gather4
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/glsl_shader.c | 25 ++++++++++++++++++++++++- dlls/wined3d/utils.c | 9 +++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 36 insertions(+), 2 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 5f1a86608b..d9eb26762c 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9703,6 +9703,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9843,6 +9845,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + BOOL fetch4 = settings->op[stage].fetch4; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9862,7 +9865,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9871,6 +9873,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9879,6 +9882,11 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9887,17 +9895,28 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_RECT: texture_function = "texture2DRect"; coord_mask = "xy"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture2DRect sampling"); + fetch4 = FALSE; break; default: FIXME("Unhandled texture type %#x.\n", settings->op[stage].tex_type); texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ if (fetch4) + { + texture_function = "textureGather"; + /* Tested on W10+Intel, fetch4 enabled disables projection */ + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9946,6 +9965,10 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); }
+ /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.xwyz;\n", stage, stage); + string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, settings->op[stage].color_fixup); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index f8da256b07..24e08df377 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -340,6 +340,11 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, + {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -5783,6 +5788,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5926,6 +5932,9 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] && gl_info->supported[ARB_TEXTURE_GATHER] && + state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 && + state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4')); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 0df3f2ab2a..60edb9bac1 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4412,6 +4413,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {
Am 08.01.2019 um 22:46 schrieb Daniel Ansorregui [email protected]:
@@ -340,6 +340,11 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION},
- {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4},
- {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4},
};
...
@@ -5926,6 +5932,9 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP;
settings->op[i].fetch4 = (state->textures[i] && gl_info->supported[ARB_TEXTURE_GATHER] &&
state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 &&
}state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4'));
You could remove WINED3DFMT_FLAG_ALLOW_FETCH4 from all formats during the format table initialization if ARB_TEXTURE_GATHER is not supported. That way you can avoid checking for the GL extension in this performance critical place.
I'm myself not sure about what is supposed to go into format_base_flags[] and format_texture_info[], but I think the former is for intrinsic properties of the format, whereas the latter is for things that at least to some extent depend on the OpenGL capabilities. Since you need ARB_texture_gather for fetch4 I'd put it into format_texture_info, but wait for confirmation from Henri before changing it.
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - FIXME: 3D textures and fetch4 - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/cs.c | 5 +- dlls/wined3d/device.c | 4 + dlls/wined3d/glsl_shader.c | 160 ++++++++++++++++++++++++++------- dlls/wined3d/shader.c | 12 +++ dlls/wined3d/state.c | 13 +++ dlls/wined3d/wined3d_private.h | 7 +- 6 files changed, 164 insertions(+), 37 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 5ea25e992a..355286f346 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1384,7 +1384,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index d03a982e74..876f34fc2c 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2087,7 +2087,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index d9eb26762c..1bd94c8271 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3613,6 +3614,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3660,19 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -5397,11 +5412,25 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[resource_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + sample_flags = WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (sample_flags == WINED3D_GLSL_SAMPLE_GATHER) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5417,7 +5446,7 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); @@ -5433,10 +5462,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5446,7 +5476,24 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)){ + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type != WINED3D_SHADER_RESOURCE_TEXTURE_3D) + { + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + else + FIXME("Unsupported Fetch4 and texture3D sampling"); + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); @@ -5461,19 +5508,28 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + flags = WINED3D_GLSL_SAMPLE_GATHER; + }
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5482,6 +5538,18 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, + "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -6175,6 +6243,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6189,6 +6258,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6214,46 +6287,64 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + /* 3D + Fetch4 should return textureGather(sampler2DArray, t.xy0) + unfortunately, we cant convert 3D to 2DArray */ + if (ins->ctx->reg_maps->resource_info[sampler_idx].type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + } + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6299,6 +6390,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6310,8 +6404,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..f07804de63 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,18 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..dc69e935c5 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 60edb9bac1..79cc638ae6 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (0u | (3u << 2) | (1u << 4) | (2u << 6)) /* xwyz */
enum wined3d_shader_src_modifier { @@ -1358,7 +1359,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1895,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 30 ++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 38 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 24e08df377..02667870cb 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -345,6 +349,8 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -1893,6 +1899,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0, + GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + EXT_PACKED_DEPTH_STENCIL, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3534,6 +3559,9 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "X011", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "X111", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4388,6 +4416,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index b0b1abc508..abf103f395 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
Hi,
While running your changed tests on Windows, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=46186
Your paranoid android.
=== w8 (32 bit report) ===
d3d9: visual.c:8620: Test failed: Got unexpected color 0x00007580 for quad 2 (different colors).
=== w1064 (64 bit report) ===
d3d9: visual.c:8572: Test failed: Input test: Quad 3(2crd-wrongidx) returned color 0x00ff00ff, expected 0x00ff0080 visual.c:8620: Test failed: Got unexpected color 0x00007580 for quad 2 (different colors).
Hi,
I tested the tests on Windows 7 with a Radeon HD 5770. There are plenty of failures, for some of which I have an explanation, but others I don't.
Visual2.txt is the output of the test. I also attached various screenshots
*) ffp_texld is the first test loop. The fixed function and texld output looks the same. *) texldp is the texldp shader. It appears that AMD still does the projected read at the same time it does the fetch4 lookup. *) texldl and texldd suggest that AMD drivers do not apply FETCH4 for those instructions *) texldb however behaves like texld. Presumably it also does that with fetch4 off on the single-level texture :-)
What I can't explain is why the colors even in the FFP and texld case are wrong. Maybe intel applies a wrong swizzle?
Please ignore the white border on some of the PNGs. I was sloppy cutting off the irrelevant parts of the screenshots, and somehow windows didn't want to screenshot the individual window.
I'll look into the remaining failures and send more screenshots to avoid overshooting the mailing list size limit.
Where can we go from here? Because apparently applications cannot rely on a specific GPU behavior we have more freedom. Generally I recommend to stick with AMDs behavior, but if it is too cumbersome in a corner case we can write a WARN or FIXME if an application even uses it. If the different colors are a result of a different swizzle and nobody seems to have noticed yet, it is even more likely that nobody actually uses FETCH4. In that case we could make the entire FETCH4 thing a FIXME, enable the DF24 depth format and see if any application ever hits the FIXME. It'd be a pity to have your work just gathering dust on the mailing list archives though.
We could also implement FETCH4 the AMD way, and mark all intel results broken(). Or we even remove the more corner case tests that produce contrary results, like texldp and texldd, but keep the baseline tests like ffp and texld. The important thing is that we preserve the knowledge the tests have found in a comment.
Stefan
Another pile of screenshots, this time the format tests. Only FFP to reduce my work. I also assigned the names manually and might have screwed up.
Two observations: *) AMD enables fetch4 on A8 and ARGB8 too. *) Comparing the expected and actual results reinforce my suspicion that the output swizzle differs
I don't know how it picks the color in the multi-channel textures. The A8 results suggest it doesn't just pick the red channel, and the R16F and R32F results suggest it doesn't just pick the alpha channel. I guess we're safe picking the red channel and can drop the fetch4 enable / disable based on texture formats.
Am 10.01.2019 um 18:30 schrieb Stefan Dösinger [email protected]:
Hi,
I tested the tests on Windows 7 with a Radeon HD 5770. There are plenty of failures, for some of which I have an explanation, but others I don't.
Visual2.txt is the output of the test. I also attached various screenshots
*) ffp_texld is the first test loop. The fixed function and texld output looks the same. *) texldp is the texldp shader. It appears that AMD still does the projected read at the same time it does the fetch4 lookup. *) texldl and texldd suggest that AMD drivers do not apply FETCH4 for those instructions *) texldb however behaves like texld. Presumably it also does that with fetch4 off on the single-level texture :-)
What I can't explain is why the colors even in the FFP and texld case are wrong. Maybe intel applies a wrong swizzle?
Please ignore the white border on some of the PNGs. I was sloppy cutting off the irrelevant parts of the screenshots, and somehow windows didn't want to screenshot the individual window.
I'll look into the remaining failures and send more screenshots to avoid overshooting the mailing list size limit.
Where can we go from here? Because apparently applications cannot rely on a specific GPU behavior we have more freedom. Generally I recommend to stick with AMDs behavior, but if it is too cumbersome in a corner case we can write a WARN or FIXME if an application even uses it. If the different colors are a result of a different swizzle and nobody seems to have noticed yet, it is even more likely that nobody actually uses FETCH4. In that case we could make the entire FETCH4 thing a FIXME, enable the DF24 depth format and see if any application ever hits the FIXME. It'd be a pity to have your work just gathering dust on the mailing list archives though.
We could also implement FETCH4 the AMD way, and mark all intel results broken(). Or we even remove the more corner case tests that produce contrary results, like texldp and texldd, but keep the baseline tests like ffp and texld. The important thing is that we preserve the knowledge the tests have found in a comment.
Stefan
<texldb.png> <texldd_texldl.png> <visual2.txt><texldp.png><ffp_texld.png>
More quick testing, this time without screenshots: It seems AMD does do fetch4 on volume textures. Colors mismatch, presumably the same swizzle difference.
There is something going on with the depth format tests, but I can't quite interpret the results. I'll look at it closer once we have answers to the swizzle question.
Am 10.01.2019 um 18:52 schrieb Stefan Dösinger [email protected]:
Another pile of screenshots, this time the format tests. Only FFP to reduce my work. I also assigned the names manually and might have screwed up.
Two observations: *) AMD enables fetch4 on A8 and ARGB8 too. *) Comparing the expected and actual results reinforce my suspicion that the output swizzle differs
I don't know how it picks the color in the multi-channel textures. The A8 results suggest it doesn't just pick the red channel, and the R16F and R32F results suggest it doesn't just pick the alpha channel. I guess we're safe picking the red channel and can drop the fetch4 enable / disable based on texture formats.
<A8R8G8B8_ffp.png><A8_ffp.png><L8_ffp.png><L16_ffp.png><R16F_ffp.png>
Am 10.01.2019 um 18:30 schrieb Stefan Dösinger <[email protected] mailto:[email protected]>:
Hi,
I tested the tests on Windows 7 with a Radeon HD 5770. There are plenty of failures, for some of which I have an explanation, but others I don't.
Visual2.txt is the output of the test. I also attached various screenshots
*) ffp_texld is the first test loop. The fixed function and texld output looks the same. *) texldp is the texldp shader. It appears that AMD still does the projected read at the same time it does the fetch4 lookup. *) texldl and texldd suggest that AMD drivers do not apply FETCH4 for those instructions *) texldb however behaves like texld. Presumably it also does that with fetch4 off on the single-level texture :-)
What I can't explain is why the colors even in the FFP and texld case are wrong. Maybe intel applies a wrong swizzle?
Please ignore the white border on some of the PNGs. I was sloppy cutting off the irrelevant parts of the screenshots, and somehow windows didn't want to screenshot the individual window.
I'll look into the remaining failures and send more screenshots to avoid overshooting the mailing list size limit.
Where can we go from here? Because apparently applications cannot rely on a specific GPU behavior we have more freedom. Generally I recommend to stick with AMDs behavior, but if it is too cumbersome in a corner case we can write a WARN or FIXME if an application even uses it. If the different colors are a result of a different swizzle and nobody seems to have noticed yet, it is even more likely that nobody actually uses FETCH4. In that case we could make the entire FETCH4 thing a FIXME, enable the DF24 depth format and see if any application ever hits the FIXME. It'd be a pity to have your work just gathering dust on the mailing list archives though.
We could also implement FETCH4 the AMD way, and mark all intel results broken(). Or we even remove the more corner case tests that produce contrary results, like texldp and texldd, but keep the baseline tests like ffp and texld. The important thing is that we preserve the knowledge the tests have found in a comment.
Stefan
<texldb.png> <texldd_texldl.png> <visual2.txt><texldp.png><ffp_texld.png>
And one last mail, here is a log from Windows 7 on my Radeon X1600 (r500) machine. This was the GPU where AMD originally introduced fetch4, and the result looks identical to the newer HD 5770 (r700 I think). 6 fewer failures, I assume a depth format is not supported.
https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_texture_texture4.... https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_texture_texture4.txt may also be an interesting read. The other interesting side-note is that r500 does not support unconditional NP2 textures, so we'd have a case where FETCH4 could be used with GL_ARB_texture_rectangle, but I seriously doubt that this is worth the effort because this hardware is more than 10 years old. I doubt mesa exposes AMD_texture_texture4 on the r300g driver.
Am 10.01.2019 um 18:52 schrieb Stefan Dösinger [email protected]:
Another pile of screenshots, this time the format tests. Only FFP to reduce my work. I also assigned the names manually and might have screwed up.
Two observations: *) AMD enables fetch4 on A8 and ARGB8 too. *) Comparing the expected and actual results reinforce my suspicion that the output swizzle differs
I don't know how it picks the color in the multi-channel textures. The A8 results suggest it doesn't just pick the red channel, and the R16F and R32F results suggest it doesn't just pick the alpha channel. I guess we're safe picking the red channel and can drop the fetch4 enable / disable based on texture formats.
<A8R8G8B8_ffp.png><A8_ffp.png><L8_ffp.png><L16_ffp.png><R16F_ffp.png>
Am 10.01.2019 um 18:30 schrieb Stefan Dösinger <[email protected] mailto:[email protected]>:
Hi,
I tested the tests on Windows 7 with a Radeon HD 5770. There are plenty of failures, for some of which I have an explanation, but others I don't.
Visual2.txt is the output of the test. I also attached various screenshots
*) ffp_texld is the first test loop. The fixed function and texld output looks the same. *) texldp is the texldp shader. It appears that AMD still does the projected read at the same time it does the fetch4 lookup. *) texldl and texldd suggest that AMD drivers do not apply FETCH4 for those instructions *) texldb however behaves like texld. Presumably it also does that with fetch4 off on the single-level texture :-)
What I can't explain is why the colors even in the FFP and texld case are wrong. Maybe intel applies a wrong swizzle?
Please ignore the white border on some of the PNGs. I was sloppy cutting off the irrelevant parts of the screenshots, and somehow windows didn't want to screenshot the individual window.
I'll look into the remaining failures and send more screenshots to avoid overshooting the mailing list size limit.
Where can we go from here? Because apparently applications cannot rely on a specific GPU behavior we have more freedom. Generally I recommend to stick with AMDs behavior, but if it is too cumbersome in a corner case we can write a WARN or FIXME if an application even uses it. If the different colors are a result of a different swizzle and nobody seems to have noticed yet, it is even more likely that nobody actually uses FETCH4. In that case we could make the entire FETCH4 thing a FIXME, enable the DF24 depth format and see if any application ever hits the FIXME. It'd be a pity to have your work just gathering dust on the mailing list archives though.
We could also implement FETCH4 the AMD way, and mark all intel results broken(). Or we even remove the more corner case tests that produce contrary results, like texldp and texldd, but keep the baseline tests like ffp and texld. The important thing is that we preserve the knowledge the tests have found in a comment.
Stefan
<texldb.png> <texldd_texldl.png> <visual2.txt><texldp.png><ffp_texld.png>
Hi Stefan,
Let me summarize here my discoveries. First thing, is good that the tests are exercising properly all the fetch4 implementation. We can solve the differences in results based on which platform we believe is right or not.
*Differences:* - *Fetch4 offset*. It looks to me that what my result match the spec, while your results are different. The spec states that fetch4 "*allows the fetching of four unfiltered neighboring texels (2x2 texel block) in a single texture instruction.*" Also points to DirectX11 Gather as a similar instruction with different swizzle. Gather: "*Gets the four samples (red component only) that would be used for bilinear interpolation when sampling a texture*." So if we assume that is true then the results need to have an offset, like in the newest Intel implementation (see screenshot with OFF/ON, I highlighted the texels locations in linear filtering ): [image: image.png] [image: image.png]
- *Swizzle*. Checking the spec, it looks like yours is what the spec says and mine has completely different values. In the spec is says that it should be: A R G B But mine are: G A R B
My theory is that this might be an error in the Intel implementation, simply because they implemented it with the same swizzle as in Gather/textureGather. Anyway, if someone has some light on why the offset appears on the fetch4, that would be helpful. @Henri
BR, Daniel
El jue., 10 ene. 2019 a las 18:36, Stefan Dösinger (< [email protected]>) escribió:
And one last mail, here is a log from Windows 7 on my Radeon X1600 (r500) machine. This was the GPU where AMD originally introduced fetch4, and the result looks identical to the newer HD 5770 (r700 I think). 6 fewer failures, I assume a depth format is not supported.
https://www.khronos.org/registry/OpenGL/extensions/AMD/AMD_texture_texture4.... may also be an interesting read. The other interesting side-note is that r500 does not support unconditional NP2 textures, so we'd have a case where FETCH4 could be used with GL_ARB_texture_rectangle, but I seriously doubt that this is worth the effort because this hardware is more than 10 years old. I doubt mesa exposes AMD_texture_texture4 on the r300g driver.
Am 10.01.2019 um 18:52 schrieb Stefan Dösinger <[email protected]
:
Another pile of screenshots, this time the format tests. Only FFP to reduce my work. I also assigned the names manually and might have screwed up.
Two observations: *) AMD enables fetch4 on A8 and ARGB8 too. *) Comparing the expected and actual results reinforce my suspicion that the output swizzle differs
I don't know how it picks the color in the multi-channel textures. The A8 results suggest it doesn't just pick the red channel, and the R16F and R32F results suggest it doesn't just pick the alpha channel. I guess we're safe picking the red channel and can drop the fetch4 enable / disable based on texture formats.
<A8R8G8B8_ffp.png><A8_ffp.png><L8_ffp.png><L16_ffp.png><R16F_ffp.png>
Am 10.01.2019 um 18:30 schrieb Stefan Dösinger <[email protected]
:
Hi,
I tested the tests on Windows 7 with a Radeon HD 5770. There are plenty of failures, for some of which I have an explanation, but others I don't.
Visual2.txt is the output of the test. I also attached various screenshots
*) ffp_texld is the first test loop. The fixed function and texld output looks the same. *) texldp is the texldp shader. It appears that AMD still does the projected read at the same time it does the fetch4 lookup. *) texldl and texldd suggest that AMD drivers do not apply FETCH4 for those instructions *) texldb however behaves like texld. Presumably it also does that with fetch4 off on the single-level texture :-)
What I can't explain is why the colors even in the FFP and texld case are wrong. Maybe intel applies a wrong swizzle?
Please ignore the white border on some of the PNGs. I was sloppy cutting off the irrelevant parts of the screenshots, and somehow windows didn't want to screenshot the individual window.
I'll look into the remaining failures and send more screenshots to avoid overshooting the mailing list size limit.
Where can we go from here? Because apparently applications cannot rely on a specific GPU behavior we have more freedom. Generally I recommend to stick with AMDs behavior, but if it is too cumbersome in a corner case we can write a WARN or FIXME if an application even uses it. If the different colors are a result of a different swizzle and nobody seems to have noticed yet, it is even more likely that nobody actually uses FETCH4. In that case we could make the entire FETCH4 thing a FIXME, enable the DF24 depth format and see if any application ever hits the FIXME. It'd be a pity to have your work just gathering dust on the mailing list archives though.
We could also implement FETCH4 the AMD way, and mark all intel results broken(). Or we even remove the more corner case tests that produce contrary results, like texldp and texldd, but keep the baseline tests like ffp and texld. The important thing is that we preserve the knowledge the tests have found in a comment.
Stefan
<texldb.png> <texldd_texldl.png> <visual2.txt><texldp.png><ffp_texld.png>
Am 14.01.19 um 23:22 schrieb DarkZeros:
*_Differences_:*
- *Fetch4 offset*. It looks to me that what my result match the spec,
while your results are different. The spec states that fetch4 "/allows the fetching of four unfiltered neighboring texels (2x2 texel block) in a single texture instruction./" Also points to DirectX11 Gather as a similar instruction with different swizzle. Gather: "/Gets the four samples (red component only) that would be used for bilinear interpolation when sampling a texture/." So if we assume that is true then the results need to have an offset, like in the newest Intel implementation (see screenshot with OFF/ON, I highlighted the texels locations in linear filtering ):
Yes, I think you are right, the results you get on your Intel GPU make more sense than the ones I get on my slightly older Intel and the AMD GPU.
At the edges of the output image you'd expect a wrap-around to the other side of the texture. That this does not happen on AMD suggests that they add a 0.5x0.5 texel offset when fetch4 is used. Why they would do that is beyond me though.
I haven't found any Microsoft documents that explicitly state that the texture coordinates in d3d9 are at the center of the texel (like they are in GL and d3d10), unlike the viewport coordinates, that are in the corner of the framebuffer pixels. However, a number of third party documentation suggests this. That would also explain why we never had to worry about a half texel offset with textures.
I guess AMD incorrectly tried to correct the d3d9 weirdness.
Since AMD invented fetch4 we should stick to their behavior, if possible.
- *Swizzle*. Checking the spec, it looks like yours is what the spec
says and mine has completely different values. In the spec is says that it should be: A R G B But mine are: G A R B
My theory is that this might be an error in the Intel implementation, simply because they implemented it with the same swizzle as in Gather/textureGather.
If it were G B A R
I could try to explain it away with a top left vs bottom left coordinate origin, but the swizzle you actually see I have no idea...
Hi Stefan,
Instead of keeping the discussion private I prefer to send it to the same thread.
I got a new file for you to test. https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 I made some big mistakes on the first one (like wine and windows flags were inverted). This new one will hopefully only fail on the middle of the projection (due to rounding i will fix tomorrow), and DFXX.
The screenshot of texldp and DF16/24 are the only ones I need to finalize the implementation. For reference, these are my screenshots of DF16/DF24 on wine. [image: image.png]
BTW, according to your data, the R500 and HD5700 behave differently on the texldl/texldd/texldb. The "HD 5700" has fetch4 off, while the R500 has fetch4 ON and behaves like "texld". I am making wine behave like the R500 / Intel and keep those ON with Fetch4. I have a suspicion that this may have been affected due to the change of D3DTEXF_LINEAR / D3DTEXF_POINT. But I don't think so....
BR, Daniel
El mar., 15 ene. 2019 a las 10:45, Stefan Dösinger (< [email protected]>) escribió:
Am 14.01.19 um 23:22 schrieb DarkZeros:
*_Differences_:*
- *Fetch4 offset*. It looks to me that what my result match the spec,
while your results are different. The spec states that fetch4 "/allows the fetching of four unfiltered neighboring texels (2x2 texel block) in a single texture instruction./" Also points to DirectX11 Gather as a similar instruction with different swizzle. Gather: "/Gets the four samples (red component only) that would be used for bilinear interpolation when sampling a texture/." So if we assume that is true then the results need to have an offset, like in the newest Intel implementation (see screenshot with OFF/ON, I highlighted the texels locations in linear filtering ):
Yes, I think you are right, the results you get on your Intel GPU make more sense than the ones I get on my slightly older Intel and the AMD GPU.
At the edges of the output image you'd expect a wrap-around to the other side of the texture. That this does not happen on AMD suggests that they add a 0.5x0.5 texel offset when fetch4 is used. Why they would do that is beyond me though.
I haven't found any Microsoft documents that explicitly state that the texture coordinates in d3d9 are at the center of the texel (like they are in GL and d3d10), unlike the viewport coordinates, that are in the corner of the framebuffer pixels. However, a number of third party documentation suggests this. That would also explain why we never had to worry about a half texel offset with textures.
I guess AMD incorrectly tried to correct the d3d9 weirdness.
Since AMD invented fetch4 we should stick to their behavior, if possible.
- *Swizzle*. Checking the spec, it looks like yours is what the spec
says and mine has completely different values. In the spec is says that it should be: A R G B But mine are: G A R B
My theory is that this might be an error in the Intel implementation, simply because they implemented it with the same swizzle as in Gather/textureGather.
If it were G B A R
I could try to explain it away with a top left vs bottom left coordinate origin, but the swizzle you actually see I have no idea...
Hi,
Am 22.01.2019 um 01:25 schrieb DarkZeros [email protected]:
Hi Stefan,
Instead of keeping the discussion private I prefer to send it to the same thread.
Good idea :-)
I got a new file for you to test. https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 I made some big mistakes on the first one (like wine and windows flags were inverted). This new one will hopefully only fail on the middle of the projection (due to rounding i will fix tomorrow), and DFXX.
Can you send me a diff for the tests? I have no problem compiling the crosstests myself, but a compiled .exe severely limits my abilities to experiment with it.
Hi,
Please find attached (compressed format) the set of patches, Hopefully the testbot will not compile and run them.
I would say the implementation on Wine is almost finished, projection and 0.5 texel offset are working. 3D textures are not, but don't work on AMD anyway. I just need the values for texldp on AMD, DF16/24 in order to adjust the test results expected (and maybe correct some minor deviation)
BR, Daniel
El mar., 22 ene. 2019 a las 20:33, Stefan Dösinger (< [email protected]>) escribió:
Hi,
Am 22.01.2019 um 01:25 schrieb DarkZeros [email protected]:
Hi Stefan,
Instead of keeping the discussion private I prefer to send it to the same thread.
Good idea :-)
I got a new file for you to test. https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 I made some big mistakes on the first one (like wine and windows flags were inverted). This new one will hopefully only fail on the middle of the projection (due to rounding i will fix tomorrow), and DFXX.
Can you send me a diff for the tests? I have no problem compiling the crosstests myself, but a compiled .exe severely limits my abilities to experiment with it.
If you test it, test this new set, some results are now corrected.
El mié., 23 ene. 2019 a las 1:29, DarkZeros ([email protected]) escribió:
Hi,
Please find attached (compressed format) the set of patches, Hopefully the testbot will not compile and run them.
I would say the implementation on Wine is almost finished, projection and 0.5 texel offset are working. 3D textures are not, but don't work on AMD anyway. I just need the values for texldp on AMD, DF16/24 in order to adjust the test results expected (and maybe correct some minor deviation)
BR, Daniel
El mar., 22 ene. 2019 a las 20:33, Stefan Dösinger (< [email protected]>) escribió:
Hi,
Am 22.01.2019 um 01:25 schrieb DarkZeros [email protected]:
Hi Stefan,
Instead of keeping the discussion private I prefer to send it to the same thread.
Good idea :-)
I got a new file for you to test. https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 I made some big mistakes on the first one (like wine and windows flags were inverted). This new one will hopefully only fail on the middle of the projection (due to rounding i will fix tomorrow), and DFXX.
Can you send me a diff for the tests? I have no problem compiling the crosstests myself, but a compiled .exe severely limits my abilities to experiment with it.
Hi,
I got around to running your tests on Windows. On my Radeon HD 5770 the texldp results match what I get on your Wine implementation. The DF16 and DF24 results differ in the green and blue channels - it seems that this particular GPU uses "X001" for both of them. See df24_off_evergreen.png, texldp_evergreen.png and evergreen.txt The attached screenshots show the output with your filtering settings (mag = linear, min = point, mip = linear).
Testing on the r500 GPU reveals that the weird 0.5 pixel offset is gone. This is a result of the linear mag filter, the min or mip filter appear to have no effect. I recommend to set the filters in the way AMD mandates and continue to go for the weird 0.5 texel offset.
I don't think the difference on the r500 card is a result of clamping at the border. The entire image is shifted, including the pixels that sample from the center texels. See r500_ffp_point.png and r500_ffp_lin.png.
Switching to linear mag filtering obviously causes test failures in the tests that have fetch4 off. I'll let you handle the fallout of that and then re-run the tests on my two machines.
Maybe I'll get around to installing Windows 10 on my new Mac with an AMD Polaris GPU. That should give some clues how the behavior developed since then.
Stefan
Am 24.01.19 um 02:47 schrieb DarkZeros:
If you test it, test this new set, some results are now corrected.
El mié., 23 ene. 2019 a las 1:29, DarkZeros (<[email protected] mailto:[email protected]>) escribió:
Hi, Please find attached (compressed format) the set of patches, Hopefully the testbot will not compile and run them. I would say the implementation on Wine is almost finished, projection and 0.5 texel offset are working. 3D textures are not, but don't work on AMD anyway. I just need the values for texldp on AMD, DF16/24 in order to adjust the test results expected (and maybe correct some minor deviation) BR, Daniel El mar., 22 ene. 2019 a las 20:33, Stefan Dösinger (<[email protected] <mailto:[email protected]>>) escribió: Hi,
Am 22.01.2019 um 01:25 schrieb DarkZeros <[email protected] <mailto:[email protected]>>: Hi Stefan, Instead of keeping the discussion private I prefer to send it to the same thread.
Good idea :-)
I got a new file for you to test. https://www.dropbox.com/s/9a3t1j2nsy3nlm0/d3d9_crosstest.exe?dl=0 I made some big mistakes on the first one (like wine and windows flags were inverted). This new one will hopefully only fail on the middle of the projection (due to rounding i will fix tomorrow), and DFXX.
Can you send me a diff for the tests? I have no problem compiling the crosstests myself, but a compiled .exe severely limits my abilities to experiment with it.
Am 25.01.19 um 17:57 schrieb Stefan Dösinger:
Maybe I'll get around to installing Windows 10 on my new Mac with an AMD Polaris GPU. That should give some clues how the behavior developed since then.
I totally forgot that I had a desktop box with a Radeon RX 580 card (Polaris I think) and Windows 10 under my desk. Attached are results with your filtering settings and point filtering everywhere.
On a quick look it seems that it also has the 0.5 texel offset, but has some precision issues with the colors it writes. You may have to increase the allowed slop in the color comparisons. Maybe you can also add or subtract one from the expected results and keep everything happy with the current slop.
Hi,
I'm trying to implement FETCH4 as well for nine. Unfortunately, my windows test system died, thus I rely on your experiments.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4. I noticed a few things: . FETCH4 is used for DF24 sampling. A small -0.0002 offset is added to the coordinates. . For some calls it sets FETCH4 on DXT1 cube textures. . The texturing support for D24X8 is fetched with usage=0x20002, ie with D3DUSAGE_QUERY_FILTER. I guess this checks PCF support.
My initial patch just let FETCH4 trigger gather4 for the red channel for any texture. This seems to be wrong though as the game renders black and white as a result.
Clearly looking at how the shader is using the tex output, it was expecting the FETCH4 on the DXT1 cube texture to be a normal tex instruction. There may also be other weird usages I missed.
My understanding of the wine proposed patchset is that you shouldn't hit the same problem as you restrict FETCH4 support for only a small subset of textures. That raises some other questions, though: Do FETCH4 on some non-FETCH4 texture do really nothing ? I mean, maybe just like for the FETCH4 formats, it's possible projection, lod, etc get ignored.
Axel
On 25/01/2019 19:24, Stefan Dösinger wrote:
Am 25.01.19 um 17:57 schrieb Stefan Dösinger:
Maybe I'll get around to installing Windows 10 on my new Mac with an AMD Polaris GPU. That should give some clues how the behavior developed since then.
I totally forgot that I had a desktop box with a Radeon RX 580 card (Polaris I think) and Windows 10 under my desk. Attached are results with your filtering settings and point filtering everywhere.
On a quick look it seems that it also has the 0.5 texel offset, but has some precision issues with the colors it writes. You may have to increase the allowed slop in the color comparisons. Maybe you can also add or subtract one from the expected results and keep everything happy with the current slop.
Hi,
Another info about the 0.5 offset is the following comments in the r600 gallium driver: /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size. * * The workaround is to subtract 0.5 from the unnormalized coordinates, * or (0.5 / size) from the normalized coordinates. */ The driver implements the workaround.
r600's GATHER4 is likely to be the original instruction meant for FETCH4, as except for Cayman, the driver reorders the result to match the gl's version.
On 27/01/2019 00:55, Axel Davy wrote:
Hi,
I'm trying to implement FETCH4 as well for nine. Unfortunately, my windows test system died, thus I rely on your experiments.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4. I noticed a few things: . FETCH4 is used for DF24 sampling. A small -0.0002 offset is added to the coordinates. . For some calls it sets FETCH4 on DXT1 cube textures. . The texturing support for D24X8 is fetched with usage=0x20002, ie with D3DUSAGE_QUERY_FILTER. I guess this checks PCF support.
My initial patch just let FETCH4 trigger gather4 for the red channel for any texture. This seems to be wrong though as the game renders black and white as a result.
Clearly looking at how the shader is using the tex output, it was expecting the FETCH4 on the DXT1 cube texture to be a normal tex instruction. There may also be other weird usages I missed.
My understanding of the wine proposed patchset is that you shouldn't hit the same problem as you restrict FETCH4 support for only a small subset of textures. That raises some other questions, though: Do FETCH4 on some non-FETCH4 texture do really nothing ? I mean, maybe just like for the FETCH4 formats, it's possible projection, lod, etc get ignored.
Axel
On 25/01/2019 19:24, Stefan Dösinger wrote:
Am 25.01.19 um 17:57 schrieb Stefan Dösinger:
Maybe I'll get around to installing Windows 10 on my new Mac with an AMD Polaris GPU. That should give some clues how the behavior developed since then.
I totally forgot that I had a desktop box with a Radeon RX 580 card (Polaris I think) and Windows 10 under my desk. Attached are results with your filtering settings and point filtering everywhere.
On a quick look it seems that it also has the 0.5 texel offset, but has some precision issues with the colors it writes. You may have to increase the allowed slop in the color comparisons. Maybe you can also add or subtract one from the expected results and keep everything happy with the current slop.
On 27/01/2019 01:04, Axel Davy wrote:
Hi,
Another info about the 0.5 offset is the following comments in the r600 gallium driver: /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size.
This is interesting, and I guess it explains why I saw this behavior on r500 only when point mag filtering was enabled, but not when linear mag filters were set.
Does that also apply for minification filters?
r600's GATHER4 is likely to be the original instruction meant for FETCH4, as except for Cayman, the driver reorders the result to match the gl's version.
Ya, that was my impression from GL_AMD_texture_texture4. I guess this extension could in theory be implemented in r300g for r500 cards but currently isn't.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4.
Do you know any application that uses fetch4 without having an alternative codepath, or insisting on using it on AMD cards even though an alternative codepath like PCF is supported by the application and used on Nvidia cards? For us, the reason to implement fetch4 is because DF24 implies it, and there are games like CS:GO that insist on using DF24 on AMD cards even though it happily uses INTZ on Nvidia cards.
On 28/01/2019 11:16, Stefan Dösinger wrote:
On 27/01/2019 01:04, Axel Davy wrote:
Hi,
Another info about the 0.5 offset is the following comments in the r600 gallium driver: /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size.
This is interesting, and I guess it explains why I saw this behavior on r500 only when point mag filtering was enabled, but not when linear mag filters were set.
Does that also apply for minification filters?
I'm not able to say, I guess you'd have to ask an AMD dev.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4.
Do you know any application that uses fetch4 without having an alternative codepath, or insisting on using it on AMD cards even though an alternative codepath like PCF is supported by the application and used on Nvidia cards? For us, the reason to implement fetch4 is because DF24 implies it, and there are games like CS:GO that insist on using DF24 on AMD cards even though it happily uses INTZ on Nvidia cards.
Well I think it makes sense to use DF24 over INTZ if one doesn't need stencil.
As for the apps, apparently some old AMD demos are supposed to use it, but I haven't tested.
Axel
Hi,
I drafted what I think is a final implementation. This one should pass on all the HW you have. After that, I will clean the patches for submission.
Wine results are set to emulate the results with * +0.5 offset (unconditionally) * AMD swizzle, * 3d textures off, * Fetch4 on for all texldXX instructions. (texldp projected).
- Some AMD HW decides not to enable fetch4 on texldl, texldb, texldd. But I think it makes more sense to have it on, since some AMD devices have it on, and intel as well. - 3D textures are a mess, some enable fetch4, some not, some round the z axis to nearest texels, and some set it to 0. Best and simplest thing to do in my opinion is consider it totally broken, and leave it disabled, like some AMD HW does. Also because is quite hard to implement it in GL.
In the end I increased the test range to 2, to overcome rounding issues.
PD: Thanks Axel for the comment on the R500 bug. That is really helpful and explains why we are seeing the results we have. In the end, looks like it is true that Intel is following the spec, and is AMD the one that introduced the bug. It is funny though that AMD never amended the spec to clarify what they considered to be the default fetch4 behavior in their devices.
BR, Daniel
El lun., 28 ene. 2019 a las 22:42, Axel Davy ([email protected]) escribió:
On 28/01/2019 11:16, Stefan Dösinger wrote:
On 27/01/2019 01:04, Axel Davy wrote:
Hi,
Another info about the 0.5 offset is the following comments in the r600 gallium driver: /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size.
This is interesting, and I guess it explains why I saw this behavior on r500 only when point mag filtering was enabled, but not when linear mag filters were set.
Does that also apply for minification filters?
I'm not able to say, I guess you'd have to ask an AMD dev.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4.
Do you know any application that uses fetch4 without having an alternative codepath, or insisting on using it on AMD cards even though an alternative codepath like PCF is supported by the application and used on Nvidia cards? For us, the reason to implement fetch4 is because DF24 implies it, and there are games like CS:GO that insist on using DF24 on AMD cards even though it happily uses INTZ on Nvidia cards.
Well I think it makes sense to use DF24 over INTZ if one doesn't need stencil.
As for the apps, apparently some old AMD demos are supposed to use it, but I haven't tested.
Axel
Hi,
Those tests look pretty good. One of the tests appears to expect a wrong swizzle (evergreen & r500), and the DF16/DF24 tests fail on r500 due to green / blue handling that is yet again different from what we've seen elsewhere (XXX1).
I'd say just AND the OFF output color with 0xffff0000 and add a comment that green and blue are unreliable.
Am 31.01.2019 um 02:30 schrieb DarkZeros [email protected]:
Hi,
I drafted what I think is a final implementation. This one should pass on all the HW you have. After that, I will clean the patches for submission.
Wine results are set to emulate the results with
- +0.5 offset (unconditionally)
- AMD swizzle,
- 3d textures off,
- Fetch4 on for all texldXX instructions. (texldp projected).
- Some AMD HW decides not to enable fetch4 on texldl, texldb, texldd. But I think it makes more sense to have it on, since some AMD devices have it on, and intel as well.
- 3D textures are a mess, some enable fetch4, some not, some round the z axis to nearest texels, and some set it to 0. Best and simplest thing to do in my opinion is consider it totally broken, and leave it disabled, like some AMD HW does. Also because is quite hard to implement it in GL.
In the end I increased the test range to 2, to overcome rounding issues.
PD: Thanks Axel for the comment on the R500 bug. That is really helpful and explains why we are seeing the results we have. In the end, looks like it is true that Intel is following the spec, and is AMD the one that introduced the bug. It is funny though that AMD never amended the spec to clarify what they considered to be the default fetch4 behavior in their devices.
BR, Daniel
El lun., 28 ene. 2019 a las 22:42, Axel Davy (<[email protected] mailto:[email protected]>) escribió: On 28/01/2019 11:16, Stefan Dösinger wrote:
On 27/01/2019 01:04, Axel Davy wrote:
Hi,
Another info about the 0.5 offset is the following comments in the r600 gallium driver: /* Gather4 should follow the same rules as bilinear filtering, but the hardware * incorrectly forces nearest filtering if the texture format is integer. * The only effect it has on Gather4, which always returns 4 texels for * bilinear filtering, is that the final coordinates are off by 0.5 of * the texel size.
This is interesting, and I guess it explains why I saw this behavior on r500 only when point mag filtering was enabled, but not when linear mag filters were set.
Does that also apply for minification filters?
I'm not able to say, I guess you'd have to ask an AMD dev.
I experimented with 3DMark06, disabling support for D24X8 texturing to force FETCH4.
Do you know any application that uses fetch4 without having an alternative codepath, or insisting on using it on AMD cards even though an alternative codepath like PCF is supported by the application and used on Nvidia cards? For us, the reason to implement fetch4 is because DF24 implies it, and there are games like CS:GO that insist on using DF24 on AMD cards even though it happily uses INTZ on Nvidia cards.
Well I think it makes sense to use DF24 over INTZ if one doesn't need stencil.
As for the apps, apparently some old AMD demos are supposed to use it, but I haven't tested.
Axel
<patches_v3.7z>
- Test texld/texldp/texldd/texldb/texldl in PS and FFP - Test supported/unsupported texture formats on FFP/texld/texldp - Test 3dtextures (Disabled, each platform has different results) - Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/d3d9/tests/visual.c | 644 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..028fc23078 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,649 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + /* Test texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + /* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */ + 0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */ + 0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw, c0.zwww */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Sampling LOD gradient should be ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * The explicit LOD level is then ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldl */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_3d[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + + static const struct + { + struct vec3 position; + struct vec3 texcoord; + } + quad[] = + { + /* Tilted on Z axis to get a depth gradient in the depth test */ + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } + }; + + static const struct + { + UINT x[4], y[4]; /* Matrix Sampling positions */ + D3DCOLOR color_amd[16]; /* AMD original implementation swizzle with -0.5 texel coord */ + D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation swizzle with no texel coord correction */ + /* Wine follows the AMD immplementation, and consider an error the Intel one results + * However, the test will accept as valid the intel only if running on windows */ + D3DCOLOR color_3d_fetch4_off[16]; + D3DCOLOR color_fetch4_off[16]; + } + expected_colors = + { + { 40, 200, 360, 520}, + { 30, 150, 270, 390}, + /* AMD implementation - Wine implementation */ + {0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102, + 0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304, + 0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023, + 0x23221312, 0x22211211, 0x21201110, 0x20231013}, + /* Intel UHD 620 implementation */ + {0x23102013, 0x22132312, 0x21122211, 0x20112110, + 0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101, + 0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103, + 0x04200323, 0xf4230422, 0xf322f421, 0x0321f320}, + /* Fetch4 off on 3D textures */ + {0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101, + 0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303, + 0xff232323, 0xff222222, 0xff212121, 0xff202020, + 0xff131313, 0xff121212, 0xff111111, 0xff101010}, + /* Fetch4 off on 2D texture */ + {0x13131313, 0x12121212, 0x11111111, 0x10101010, + 0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101, + 0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303, + 0x23232323, 0x22222222, 0x21212121, 0x20202020} + }; + + static const DWORD fetch4_data[] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + + static struct + { + IDirect3DVertexShader9 *vs; + IDirect3DPixelShader9 *ps; + const DWORD *ps_code; + const char *name; + BOOL projection; /* The results should be projected (zoomed by 2) */ + BOOL allow_off; /* Do not enforce Fetch4 enabled on this one on Windows */ + } + shaders[] = + { + {NULL, NULL, NULL, "FFP", FALSE, FALSE}, + {NULL, NULL, ps_code_texld, "texld", FALSE, FALSE}, + {NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE}, + {NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE}, + {NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE}, + {NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE}, + }; + + static const struct + { + D3DFORMAT format; /* The format of the texture */ + DWORD data; /* The data we will write to the first line */ + UINT x, y; /* Where we expect the color to be */ + BOOL broken_wine; /* Do not check it on wine because is known ot be broken */ + D3DCOLOR color_amd[3]; /* Wine results. Results on AMD swizzle + texture offset */ + D3DCOLOR color_intel[3]; /* Results with intel UHD 620, intel swizzle + no texel offset */ + } + format_tests[] = + { + /* Enabled formats */ + {D3DFMT_L8, 0xff804010, 360, 270, FALSE, + {0x00004010, 0x00004010, 0x10400000}, + {0x40001000, 0x40001000, 0x40001000} + }, + {D3DFMT_L16, 0xff804010, 360, 270, FALSE, + {0x0000ff40, 0x0000ff40, 0x40ff0000}, + {0xff004000, 0xff004000, 0xff004000} + }, + {D3DFMT_R16F, 0x38003c00, 360, 270, FALSE, + {0x000080ff, 0x000080ff, 0xff800000}, + {0x8000ff00, 0x8000ff00, 0x8000ff00} + }, + {D3DFMT_R32F, 0x3f000000, 360, 270, FALSE, + {0x00000080, 0x00000080, 0x80000000}, + {0x00008000, 0x00008000, 0x00008000} + }, + + /* Disabled format on Intel, enabled on AMD, broken on wine + * since it is implemented with GL_ALPHA, and fetch4 will fetch RED value */ + {D3DFMT_A8, 0xff804010, 360, 270, TRUE, + {0x00004010, 0x00004010, 0x10400000}, + {0x00000000, 0x00000000, 0x00000000} + }, + + /* Disabled format */ + {D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE, + {0x00000000, 0x00000000, 0xff804010}, + {0x00000000, 0x00000000, 0xff804010} + }, + }; + + static const struct + { + D3DCOLOR color_off, color_amd, color_intel; + UINT x, y; + } + expected_depth[][4] = + { + { + /* This is the expected result for shadow samplers */ + {0xffffffff,0xffffffff,0xffffffff, 20, 15}, + {0xffffffff,0xffffffff,0xffffffff,260, 15}, + {0x00000000,0x00000000,0x00000000, 20,255}, + {0x00000000,0x00000000,0x00000000,260,135}, + }, + { + /* This is the expected result with DF16 */ + {0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + }, + { + /* This is the expected result with DF24 */ + {0xffff0000,0xffdfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + } + }; + + static const struct + { + D3DFORMAT format; + const char *name; + UINT index; + } + depth_tests[] = + { + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, + {D3DFMT_D32, "D32", 0}, + {D3DFMT_D15S1, "D15S1", 0}, + {D3DFMT_D24S8, "D24S8", 0}, + {D3DFMT_D24X8, "D24X8", 0}, + {D3DFMT_D24X4S4, "D24X4S4", 0}, + {D3DFMT_D16, "D16", 0}, + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, + {D3DFMT_D24FS8, "D24FS8", 0}, + {MAKEFOURCC('D','F','1','6'), "DF16", 1}, + {MAKEFOURCC('D','F','2','4'), "DF24", 2}, + }; + + const BOOL isWin = strcmp(winetest_platform, "wine"); + + IDirect3DSurface9 *original_ds, *original_rt, *rt; + IDirect3DVolumeTexture9 *texture3D; + IDirect3DPixelShader9 *ps_3d; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DTexture9 *texture; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + D3DLOCKED_BOX lb; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + UINT i, j, k; + HWND window; + HRESULT hr; + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 shader testing */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + /* Prepare the pixel shaders */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + if (shaders[i].ps_code) + { + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + /* Copy vertex shader pointer if a PS is present */ + shaders[i].vs = vs; + } + } + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 and also + * D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed for it to get active. + * However, AMD HW r500 samples always as if POINT (nearest filtering) is selected with FETCH4 + * the driver later on corrected this by adding -0.5 texel coord. */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /*********************************************************************** + * Tests for FFP/PS correctness when using L8 texture with fetch4. * + ***********************************************************************/ + + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j]; + D3DCOLOR color_intel = expected_colors.color_intel[j]; + ok(color_match(color, color_amd, 1) + || (isWin && (color_match(color, color_intel, 1) || shaders[i].allow_off)), + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_amd, x, y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /*************************************************************************** + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * + ***************************************************************************/ + + /* Create the textures to test FETCH4 does work/not work there as expected */ + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) + { + IDirect3DTexture9 *tex; + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, format_tests[i].format, + D3DPOOL_MANAGED, &tex, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &format_tests[i].data, 4); + hr = IDirect3DTexture9_UnlockRect(tex, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ + for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); + D3DCOLOR color_amd = format_tests[i].color_amd[j]; + D3DCOLOR color_intel = format_tests[i].color_intel[j]; + todo_wine_if(format_tests[i].broken_wine) ok(color_match(color, color_amd, 1) + || (isWin && color_match(color, color_intel, 1)), + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, + color_amd, format_tests[i].x, format_tests[i].y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + IDirect3DTexture9_Release(tex); + } + + /************************************************** + * Tests that fetch4 works with 3D textures. * + **************************************************/ + + /* Create volume (3D) texture */ + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + { + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); + /* Shift the lower level, to keep it different */ + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); + } + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + + /* Test FFP and texld with dcl_volume (ps_3d) */ + for (i = 0; i < 2; ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[j]; + D3DCOLOR color_intel = expected_colors.color_intel[j]; + D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j]; + D3DCOLOR color_zround = expected_colors.color_amd[(j+4) % ARRAY_SIZE(expected_colors.color_amd)]; + /* FIXME: Fetch4 on 3D textures have different results based on the vendor/driver + * - AMD "HD 5700" rounds to nearest "z" texel, and does fetch4 normally on .xy + * - AMD "R500" has fetch4 disabled + * - AMD "R580" has fetch4 enabled sampling at .xy0 + * - Intel UHD 620 sample with fetch4 at .xy0 + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray + * Wine produces same results as if fetch4 is not enabled (which probably is better) + * Test will pass on windows if either one of the allowed results is returned */ + if(isWin) + ok(color_match(color, color_zround, 2) || color_match(color, color_off, 2) + || color_match(color, color_intel, 2) || color_match(color, color_amd, 2), + "Test 3D %s Expected colors 0x%08x || 0x%08x || 0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n", + shaders[i].name, color_amd, color_zround, color_off, color_intel, x, y, color); + else + ok(color_match(color, color_off, 2), + "Test 3D %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_off, x, y, color); + } + release_surface_readback(&rb); + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /******************************************************** + * Tests for fetch4 enable/disable with depth textures. * + ********************************************************/ + + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) + { + D3DFORMAT format = depth_tests[i].format; + IDirect3DTexture9 *depth_texture; + IDirect3DSurface9 *ds; + + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) + continue; + + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Setup the depth/stencil surface. */ + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + /* Render to the depth surface */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + IDirect3DSurface9_Release(ds); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + + /* Set a shader for depth sampling, otherwise windows does not show anything */ + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + for (j = 0; j < 2; ++j){ + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Do the actual shadow mapping. */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) + { + UINT x = expected_depth[depth_tests[i].index][k].x; + UINT y = expected_depth[depth_tests[i].index][k].y; + D3DCOLOR color_off = expected_depth[depth_tests[i].index][k].color_off; + D3DCOLOR color_amd = expected_depth[depth_tests[i].index][k].color_amd; + D3DCOLOR color_intel = expected_depth[depth_tests[i].index][k].color_intel; + D3DCOLOR color = get_readback_color(&rb, x, y); + /* When Fetch4 is OFF, ignore G and B channels on windows. + * Some implementations will copy R=G=B, some will set them to 0 */ + if(j == 0) + ok((isWin && color_match(color & 0xffff0000, color_off & 0xffff0000, 2)) + || color_match(color, color_off, 2), + "Test OFF Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_off, x, y, depth_tests[i].name, color); + else + ok(color_match(color, color_amd, 2) + || (isWin && color_match(color, color_intel, 2)), + "Test ON Expected colors 0x%08x || 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_amd, color_intel, x, y, depth_tests[i].name, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + IDirect3DTexture9_Release(depth_texture); + } + + IDirect3DVolumeTexture9_Release(texture3D); + IDirect3DTexture9_Release(texture); + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + if (shaders[i].ps) + IDirect3DPixelShader9_Release(shaders[i].ps); + IDirect3DPixelShader9_Release(ps_3d); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(rt); + IDirect3DSurface9_Release(original_ds); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24660,6 +25303,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/glsl_shader.c | 43 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 11 +++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 50 insertions(+), 8 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 3298a604fd..1950db06a9 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage, + texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 7b42202213..82111c8bb2 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -340,6 +340,12 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, + {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -5780,6 +5786,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5930,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] && gl_info->supported[ARB_TEXTURE_GATHER] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 1e3ec28d6b..4224461142 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/cs.c | 5 +- dlls/wined3d/device.c | 4 + dlls/wined3d/glsl_shader.c | 214 ++++++++++++++++++++++----------- dlls/wined3d/shader.c | 13 ++ dlls/wined3d/state.c | 13 ++ dlls/wined3d/wined3d_private.h | 7 +- 6 files changed, 186 insertions(+), 70 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 7471d24e7e..890540c78d 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dee99dcde2..99248ffb1b 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2082,7 +2082,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 1950db06a9..e53306a6c2 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3613,6 +3614,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3660,19 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -3786,12 +3801,13 @@ static void shader_glsl_color_correction(const struct wined3d_shader_instruction string_buffer_release(priv->string_buffers, reg_name); }
-static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins, - unsigned int sampler_bind_idx, const struct glsl_sample_function *sample_function, DWORD swizzle, - const char *dx, const char *dy, const char *bias, const struct wined3d_shader_texel_offset *offset, - const char *coord_reg_fmt, ...) +static void PRINTF_ATTR(10, 11) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins, + unsigned int sampler_bind_idx, const struct glsl_sample_function *sample_function, + const DWORD swizzle, const DWORD sample_flags, const char *dx, const char *dy, const char *bias, + const struct wined3d_shader_texel_offset *offset, const char *coord_reg_fmt, ...) { const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; char dst_swizzle[6]; struct color_fixup_desc fixup; BOOL np2_fixup = FALSE; @@ -3833,10 +3849,8 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ if (!string_buffer_resize(ins->ctx->buffer, ret)) break; } - if (np2_fixup) { - const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
switch (shader_glsl_get_write_mask_size(sample_function->coord_mask)) @@ -3859,6 +3873,20 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ break; } } + if(sample_flags & WINED3D_GLSL_SAMPLE_GATHER) + { + if (sample_flags & WINED3D_GLSL_SAMPLE_PROJECTED) + { + struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers); + shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx); + shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer); + string_buffer_release(priv->string_buffers, reg_name); + } + + /* Correct the fetch4 0.5 texel offset */ + shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)", + shader_glsl_get_prefix(version->type), sampler_bind_idx); + } if (dx && dy) shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy); else if (bias) @@ -5397,11 +5425,19 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) + { + sample_flags |= WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (sample_flags & WINED3D_GLSL_SAMPLE_GATHER) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5410,22 +5446,22 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { char coord_mask[6]; shader_glsl_write_mask_to_str(mask, coord_mask); - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, - "T%u%s", resource_idx, coord_mask); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, NULL, NULL, + NULL, NULL, "T%u%s", resource_idx, coord_mask); } else { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, bias.param_str, - NULL, "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, + NULL, NULL, bias.param_str, NULL, "%s", coord_param.param_str); } else { - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, - "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); } } shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -5433,10 +5469,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5446,34 +5483,48 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, WINED3D_GLSL_SAMPLE_GATHER, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[2], sample_function.deriv_mask, &dx_param); shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dy_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, - NULL, NULL, "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, 0, + dx_param.param_str, dy_param.param_str, NULL, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + }
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5482,8 +5533,20 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, - "%s", coord_param.param_str); + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags, NULL, NULL, + lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
@@ -6175,6 +6238,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6189,6 +6253,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6196,12 +6264,12 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) if (is_multisampled(reg_maps->resource_info[resource_idx].type)) { shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &sample_param); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, 0, NULL, NULL, NULL, &ins->texel_offset, "%s, %s", coord_param.param_str, sample_param.param_str); } else { - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, 0, NULL, NULL, has_lod_param ? lod_param.param_str : NULL, &ins->texel_offset, "%s", coord_param.param_str); } @@ -6214,46 +6282,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, flags, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6299,6 +6378,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6310,8 +6392,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); @@ -6327,7 +6407,7 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) } else { - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, lod_param, &ins->texel_offset, "vec%u(%s, %s)", coord_size, coord_param.param_str, compare_param.param_str); } @@ -6469,18 +6549,18 @@ static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins) switch(mask_size) { case 1: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "dot(ffp_texcoord[%u].xyz, %s)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "dot(ffp_texcoord[%u].xyz, %s)", sampler_idx, src0_param.param_str); break;
case 2: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "vec2(dot(ffp_texcoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "vec2(dot(ffp_texcoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str); break;
case 3: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "vec3(dot(ffp_texcoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "vec3(dot(ffp_texcoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str); break;
default: @@ -6588,7 +6668,7 @@ static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, reg, reg, 0, &sample_function);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, "tmp0.xy"); + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0.xy"); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
@@ -6609,7 +6689,7 @@ static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, reg, reg, 0, &sample_function);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, "tmp0.xyz"); + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0.xyz"); shader_glsl_release_sample_function(ins->ctx, &sample_function);
tex_mx->current_row = 0; @@ -6660,7 +6740,7 @@ static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
/* Sample the texture */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0%s", coord_mask); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6694,7 +6774,7 @@ static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *in shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0%s", coord_mask); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6753,7 +6833,7 @@ static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "T%u%s + vec4(bumpenv_mat%u * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx, coord_param.param_str, coord_mask);
@@ -6798,7 +6878,7 @@ static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins) shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s.wx", reg_name->buffer); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6818,7 +6898,7 @@ static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins) shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s.yz", reg_name->buffer); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6837,7 +6917,7 @@ static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s", src0_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..6e21444f8c 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,19 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..dc69e935c5 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 4224461142..654e4ff610 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */
enum wined3d_shader_src_modifier { @@ -1358,7 +1359,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1895,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 30 ++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 38 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 82111c8bb2..24fce97594 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -346,6 +350,8 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -1894,6 +1900,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0, + GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + EXT_PACKED_DEPTH_STENCIL, NULL}, + {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0, + GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3533,6 +3558,9 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4387,6 +4415,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index e09e4e1fce..e6c3c1b802 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
To my knowledge, DF24 doesn't have stencil (unlike INTZ). Using a buffer with stencil is suboptimal, I would advise using a format without.
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected]
dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 30 ++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 38 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
- if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER])
- {
TRACE("No Support for Fetch4 disabling DF24 support.\n");
return WINED3DERR_NOTAVAILABLE;
- }
}return WINED3D_OK;
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 82111c8bb2..24fce97594 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22},
{WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23},
{WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap))
@@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0},
- {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0},
- {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0},
@@ -346,6 +350,8 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b)
@@ -1894,6 +1900,25 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL},
- {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0,
GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0,
WINED3DFMT_FLAG_DEPTH,
WINED3D_GL_EXT_NONE, NULL},
- {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0,
GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0,
WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
ARB_DEPTH_TEXTURE, NULL},
- {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT, GL_DEPTH24_STENCIL8_EXT, 0,
GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT, 0,
WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
EXT_PACKED_DEPTH_STENCIL, NULL},
- {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8, GL_DEPTH24_STENCIL8, 0,
GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0,
WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE,
@@ -3533,6 +3558,9 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
{WINED3DFMT_DF16, "X001", FALSE, WINED3D_GL_EXT_NONE},
{WINED3DFMT_DF24, "X001", FALSE, WINED3D_GL_EXT_NONE},
{WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4387,6 +4415,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12);
FMT_TO_STR(WINED3DFMT_DF16);
#undef FMT_TO_STR default: {FMT_TO_STR(WINED3DFMT_DF24);
diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index e09e4e1fce..e6c3c1b802 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'),
WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'),
WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
I think INTZ+FETCH4 is an unlikely siruation because intz is an nvidia extension and fetch4 an AMD one. I would be very surprised to find a game needing it. I guess some later gpus might expose both.
Am 2. Februar 2019 21:46:27 MEZ schrieb Axel Davy [email protected]:
To my knowledge, DF24 doesn't have stencil (unlike INTZ). Using a buffer with stencil is suboptimal, I would advise using a format without.
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected]
dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 30 ++++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 38 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL
wined3d_check_device_format(const struct wined3d *wined3d, UINT ad
return WINED3DOK_NOMIPGEN; }
- if ((check_format_id == WINED3DFMT_DF24) &&
!adapter->gl_info.supported[ARB_TEXTURE_GATHER])
- {
TRACE("No Support for Fetch4 disabling DF24 support.\n");
return WINED3DERR_NOTAVAILABLE;
- }
}return WINED3D_OK;
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 82111c8bb2..24fce97594 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22},
{WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23},
{WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE +
ARRAY_SIZE(format_index_remap))
@@ -141,6 +143,8 @@ static const struct wined3d_format_channels
formats[] =
{WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0,
0, 0, 2, 0, 0},
{WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0,
0, 0, 2, 0, 0},
{WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8,
16, 24, 4, 0, 0},
- {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0,
0, 0, 2, 16, 0},
- {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0,
0, 0, 4, 24, 0},
/* Unsure about them, could not find a Windows driver that
supports them */
{WINED3DFMT_R16, 16, 0, 0, 0, 0, 0,
0, 0, 2, 0, 0},
{WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0,
0, 16, 4, 0, 0},
@@ -346,6 +350,8 @@ static const struct wined3d_format_base_flags
format_base_flags[] =
{WINED3DFMT_R16,
WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_R32_FLOAT,
WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_A8_UNORM,
WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4},
{WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE
*b)
@@ -1894,6 +1900,25 @@ static const struct
wined3d_format_texture_info format_texture_info[] =
WINED3DFMT_FLAG_TEXTURE |
WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL},
- {WINED3DFMT_DF16, GL_DEPTH_COMPONENT,
GL_DEPTH_COMPONENT, 0,
GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
0,
WINED3DFMT_FLAG_DEPTH,
WINED3D_GL_EXT_NONE, NULL},
- {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16,
GL_DEPTH_COMPONENT16, 0,
GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
0,
WINED3DFMT_FLAG_TEXTURE |
WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
ARB_DEPTH_TEXTURE, NULL},
- {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8_EXT,
GL_DEPTH24_STENCIL8_EXT, 0,
GL_DEPTH_STENCIL_EXT, GL_UNSIGNED_INT_24_8_EXT,
0,
WINED3DFMT_FLAG_TEXTURE |
WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
EXT_PACKED_DEPTH_STENCIL, NULL},
- {WINED3DFMT_DF24, GL_DEPTH24_STENCIL8,
GL_DEPTH24_STENCIL8, 0,
GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
0,
WINED3DFMT_FLAG_TEXTURE |
WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING
| WINED3DFMT_FLAG_DEPTH,
ARB_FRAMEBUFFER_OBJECT, NULL}, {WINED3DFMT_NULL, 0,
0, 0,
GL_RGBA,
GL_UNSIGNED_INT_8_8_8_8_REV, 0,
WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET
| WINED3DFMT_FLAG_FBO_ATTACHABLE,
@@ -3533,6 +3558,9 @@ static void apply_format_fixups(struct
wined3d_adapter *adapter, struct wined3d_
{WINED3DFMT_INTZ, "XXXX", FALSE,
WINED3D_GL_EXT_NONE},
{WINED3DFMT_INTZ, "XYZW", FALSE,
WINED3D_GL_LEGACY_CONTEXT},
{WINED3DFMT_DF16, "X001", FALSE,
WINED3D_GL_EXT_NONE},
{WINED3DFMT_DF24, "X001", FALSE,
WINED3D_GL_EXT_NONE},
{WINED3DFMT_L8_UNORM, "XXX1", FALSE,
ARB_TEXTURE_RG},
};
@@ -4387,6 +4415,8 @@ const char *debug_d3dformat(enum
wined3d_format_id format_id)
FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12);
FMT_TO_STR(WINED3DFMT_DF16);
#undef FMT_TO_STR default: {FMT_TO_STR(WINED3DFMT_DF24);
diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index e09e4e1fce..e6c3c1b802 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC('
','R','1','6'),
WINED3DFMT_AL16 =
WINEMAKEFOURCC('A','L','1','6'),
WINED3DFMT_NV12 =
WINEMAKEFOURCC('N','V','1','2'),
- WINED3DFMT_DF16 =
WINEMAKEFOURCC('D','F','1','6'),
- WINED3DFMT_DF24 =
WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff
};
Hi,
While running your changed tests on Windows, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=47055
Your paranoid android.
=== w1064 (32 bit report) ===
d3d9: visual.c:8750: Test failed: Got unexpected color 0x00007580 for quad 2 (different colors).
Hi,
Apparently 3DMark06 believes somehow that LINEAR disables FETCH4. It sets MIPFILTER, MINFILTER and MAGFILTER to LINEAR, and resets ADDRESSV, ADDRESSU and MIPMAPLODBIAS (1, 1, 0 respectively) then behaves as if FETCH4 was disabled (it later disables it definitely with the MIPMAPLODBIAS setting).
I don't see any obvious visual glitch, though. The texture sampled with FETCH4 which isn't meant to be sampled with it (looking at how the shader values are used) is a D3DFMT_L8 of size 1x1...
I think all this points out that one has to be very careful about FETCH4 corner cases.
Thus I would suggest adding more formats and checks to your code, like for example INTZ, ATI1, ATI2 ?
For the part "Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray", shouldn't the test be with a wine_todo rather than a isWin check ?
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- Test texld/texldp/texldd/texldb/texldl in PS and FFP
- Test supported/unsupported texture formats on FFP/texld/texldp
- Test 3dtextures (Disabled, each platform has different results)
- Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected]
dlls/d3d9/tests/visual.c | 644 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..028fc23078 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,649 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{
- static const DWORD vs_code[] =
- {
0xfffe0300, /* vs_3_0 */
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */
0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */
0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */
0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */
0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */
0x0000ffff
- };
- static const DWORD ps_code_texld[] =
- {
/* Test texld */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff /* end */
- };
- static const DWORD ps_code_texldp[] =
- {
/* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */
0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */
0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw, c0.zwww */
0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff, /* end */
- };
- static const DWORD ps_code_texldd[] =
- {
/* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD.
* Sampling LOD gradient should be ignored. Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */
0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */
0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */
0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff, /* end */
- };
- static const DWORD ps_code_texldb[] =
- {
/* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD.
* Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */
0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff, /* end */
- };
- static const DWORD ps_code_texldl[] =
- {
/* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD.
* The explicit LOD level is then ignored. Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldl */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */
0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff, /* end */
- };
- static const DWORD ps_code_3d[] =
- {
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff /* end */
- };
- static const struct
- {
struct vec3 position;
struct vec3 texcoord;
- }
- quad[] =
- {
/* Tilted on Z axis to get a depth gradient in the depth test */
/* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */
{{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} },
{{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} },
{{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} },
{{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} }
- };
- static const struct
- {
UINT x[4], y[4]; /* Matrix Sampling positions */
D3DCOLOR color_amd[16]; /* AMD original implementation swizzle with -0.5 texel coord */
D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation swizzle with no texel coord correction */
/* Wine follows the AMD immplementation, and consider an error the Intel one results
* However, the test will accept as valid the intel only if running on windows */
D3DCOLOR color_3d_fetch4_off[16];
D3DCOLOR color_fetch4_off[16];
- }
- expected_colors =
- {
{ 40, 200, 360, 520},
{ 30, 150, 270, 390},
/* AMD implementation - Wine implementation */
{0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102,
0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304,
0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023,
0x23221312, 0x22211211, 0x21201110, 0x20231013},
/* Intel UHD 620 implementation */
{0x23102013, 0x22132312, 0x21122211, 0x20112110,
0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101,
0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103,
0x04200323, 0xf4230422, 0xf322f421, 0x0321f320},
/* Fetch4 off on 3D textures */
{0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101,
0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303,
0xff232323, 0xff222222, 0xff212121, 0xff202020,
0xff131313, 0xff121212, 0xff111111, 0xff101010},
/* Fetch4 off on 2D texture */
{0x13131313, 0x12121212, 0x11111111, 0x10101010,
0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101,
0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303,
0x23232323, 0x22222222, 0x21212121, 0x20202020}
- };
- static const DWORD fetch4_data[] = {0x10111213,
0x01f1f202,
0x03f3f404,
0x20212223};
- static struct
- {
IDirect3DVertexShader9 *vs;
IDirect3DPixelShader9 *ps;
const DWORD *ps_code;
const char *name;
BOOL projection; /* The results should be projected (zoomed by 2) */
BOOL allow_off; /* Do not enforce Fetch4 enabled on this one on Windows */
- }
- shaders[] =
- {
{NULL, NULL, NULL, "FFP", FALSE, FALSE},
{NULL, NULL, ps_code_texld, "texld", FALSE, FALSE},
{NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE},
{NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE},
{NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE},
{NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE},
- };
- static const struct
- {
D3DFORMAT format; /* The format of the texture */
DWORD data; /* The data we will write to the first line */
UINT x, y; /* Where we expect the color to be */
BOOL broken_wine; /* Do not check it on wine because is known ot be broken */
D3DCOLOR color_amd[3]; /* Wine results. Results on AMD swizzle + texture offset */
D3DCOLOR color_intel[3]; /* Results with intel UHD 620, intel swizzle + no texel offset */
- }
- format_tests[] =
- {
/* Enabled formats */
{D3DFMT_L8, 0xff804010, 360, 270, FALSE,
{0x00004010, 0x00004010, 0x10400000},
{0x40001000, 0x40001000, 0x40001000}
},
{D3DFMT_L16, 0xff804010, 360, 270, FALSE,
{0x0000ff40, 0x0000ff40, 0x40ff0000},
{0xff004000, 0xff004000, 0xff004000}
},
{D3DFMT_R16F, 0x38003c00, 360, 270, FALSE,
{0x000080ff, 0x000080ff, 0xff800000},
{0x8000ff00, 0x8000ff00, 0x8000ff00}
},
{D3DFMT_R32F, 0x3f000000, 360, 270, FALSE,
{0x00000080, 0x00000080, 0x80000000},
{0x00008000, 0x00008000, 0x00008000}
},
/* Disabled format on Intel, enabled on AMD, broken on wine
* since it is implemented with GL_ALPHA, and fetch4 will fetch RED value */
{D3DFMT_A8, 0xff804010, 360, 270, TRUE,
{0x00004010, 0x00004010, 0x10400000},
{0x00000000, 0x00000000, 0x00000000}
},
/* Disabled format */
{D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE,
{0x00000000, 0x00000000, 0xff804010},
{0x00000000, 0x00000000, 0xff804010}
},
- };
- static const struct
- {
D3DCOLOR color_off, color_amd, color_intel;
UINT x, y;
- }
- expected_depth[][4] =
- {
{
/* This is the expected result for shadow samplers */
{0xffffffff,0xffffffff,0xffffffff, 20, 15},
{0xffffffff,0xffffffff,0xffffffff,260, 15},
{0x00000000,0x00000000,0x00000000, 20,255},
{0x00000000,0x00000000,0x00000000,260,135},
},
{
/* This is the expected result with DF16 */
{0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
},
{
/* This is the expected result with DF24 */
{0xffff0000,0xffdfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
}
- };
- static const struct
- {
D3DFORMAT format;
const char *name;
UINT index;
- }
- depth_tests[] =
- {
{D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0},
{D3DFMT_D32, "D32", 0},
{D3DFMT_D15S1, "D15S1", 0},
{D3DFMT_D24S8, "D24S8", 0},
{D3DFMT_D24X8, "D24X8", 0},
{D3DFMT_D24X4S4, "D24X4S4", 0},
{D3DFMT_D16, "D16", 0},
{D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0},
{D3DFMT_D24FS8, "D24FS8", 0},
{MAKEFOURCC('D','F','1','6'), "DF16", 1},
{MAKEFOURCC('D','F','2','4'), "DF24", 2},
- };
- const BOOL isWin = strcmp(winetest_platform, "wine");
- IDirect3DSurface9 *original_ds, *original_rt, *rt;
- IDirect3DVolumeTexture9 *texture3D;
- IDirect3DPixelShader9 *ps_3d;
- struct surface_readback rb;
- IDirect3DVertexShader9 *vs;
- IDirect3DTexture9 *texture;
- IDirect3DDevice9 *device;
- D3DLOCKED_RECT lr;
- D3DLOCKED_BOX lb;
- IDirect3D9 *d3d;
- ULONG refcount;
- D3DCAPS9 caps;
- UINT i, j, k;
- HWND window;
- HRESULT hr;
- window = create_window();
- d3d = Direct3DCreate9(D3D_SDK_VERSION);
- ok(!!d3d, "Failed to create a D3D object.\n");
- if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4'))))
- {
skip("No DF24 support, skipping FETCH4 test.\n");
goto done;
- }
- if (!(device = create_device(d3d, window, window, TRUE)))
- {
skip("Failed to create a D3D device, skipping tests.\n");
goto done;
- }
- hr = IDirect3DDevice9_GetDeviceCaps(device, &caps);
- ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr);
- if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0))
- {
skip("No pixel shader 3.0 support, skipping FETCH4 test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
- hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt);
- ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds);
- ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8,
D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL);
- ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr);
- /* Create our texture for FETCH4 shader testing */
- hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL);
- ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
- hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i]));
- hr = IDirect3DTexture9_UnlockRect(texture, 0);
- ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
- /* Create vertex shader */
- hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs);
- ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr);
- /* Prepare the pixel shaders */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
if (shaders[i].ps_code)
{
hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps);
ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr);
/* Copy vertex shader pointer if a PS is present */
shaders[i].vs = vs;
}
- }
- hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d);
- ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0));
- ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture);
- ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
- /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 and also
* D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed for it to get active.
* However, AMD HW r500 samples always as if POINT (nearest filtering) is selected with FETCH4
* the driver later on corrected this by adding -0.5 texel coord. */
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4'));
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- /***********************************************************************
* Tests for FFP/PS correctness when using L8 texture with fetch4. *
***********************************************************************/
- /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd = expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
ok(color_match(color, color_amd, 1)
|| (isWin && (color_match(color, color_intel, 1) || shaders[i].allow_off)),
"Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name,
color_amd, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
- /***************************************************************************
* Tests for fetch4 enable/disable with different texture formats in FFP/PS. *
***************************************************************************/
- /* Create the textures to test FETCH4 does work/not work there as expected */
- for (i = 0; i < ARRAY_SIZE(format_tests); ++i)
- {
IDirect3DTexture9 *tex;
hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, format_tests[i].format,
D3DPOOL_MANAGED, &tex, NULL);
ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0);
ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
memcpy(lr.pBits, &format_tests[i].data, 4);
hr = IDirect3DTexture9_UnlockRect(tex, 0);
ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex);
ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
/* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */
for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j)
{
hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y);
D3DCOLOR color_amd = format_tests[i].color_amd[j];
D3DCOLOR color_intel = format_tests[i].color_intel[j];
todo_wine_if(format_tests[i].broken_wine) ok(color_match(color, color_amd, 1)
|| (isWin && color_match(color, color_intel, 1)),
"Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name,
color_amd, format_tests[i].x, format_tests[i].y, color);
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
IDirect3DTexture9_Release(tex);
- }
- /**************************************************
* Tests that fetch4 works with 3D textures. *
**************************************************/
- /* Create volume (3D) texture */
- IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL );
- ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr);
- hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
- {
memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i]));
/* Shift the lower level, to keep it different */
memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i]));
- }
- hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0);
- ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D);
- ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
- /* Test FFP and texld with dcl_volume (ps_3d) */
- for (i = 0; i < 2; ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd = expected_colors.color_amd[j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j];
D3DCOLOR color_zround = expected_colors.color_amd[(j+4) % ARRAY_SIZE(expected_colors.color_amd)];
/* FIXME: Fetch4 on 3D textures have different results based on the vendor/driver
* - AMD "HD 5700" rounds to nearest "z" texel, and does fetch4 normally on .xy
* - AMD "R500" has fetch4 disabled
* - AMD "R580" has fetch4 enabled sampling at .xy0
* - Intel UHD 620 sample with fetch4 at .xy0
* Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray
* Wine produces same results as if fetch4 is not enabled (which probably is better)
* Test will pass on windows if either one of the allowed results is returned */
if(isWin)
ok(color_match(color, color_zround, 2) || color_match(color, color_off, 2)
|| color_match(color, color_intel, 2) || color_match(color, color_amd, 2),
"Test 3D %s Expected colors 0x%08x || 0x%08x || 0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n",
shaders[i].name, color_amd, color_zround, color_off, color_intel, x, y, color);
else
ok(color_match(color, color_off, 2),
"Test 3D %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name,
color_off, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
- /********************************************************
* Tests for fetch4 enable/disable with depth textures. *
********************************************************/
- for (i = 0; i < ARRAY_SIZE(depth_tests); ++i)
- {
D3DFORMAT format = depth_tests[i].format;
IDirect3DTexture9 *depth_texture;
IDirect3DSurface9 *ds;
if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format)))
continue;
hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1,
D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL);
ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr);
hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds);
ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetVertexShader(device, NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture);
ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1'));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Setup the depth/stencil surface. */
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
/* Render to the depth surface */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr);
IDirect3DSurface9_Release(ds);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
/* Set a shader for depth sampling, otherwise windows does not show anything */
hr = IDirect3DDevice9_SetVertexShader(device, vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
for (j = 0; j < 2; ++j){
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' ));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Do the actual shadow mapping. */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k)
{
UINT x = expected_depth[depth_tests[i].index][k].x;
UINT y = expected_depth[depth_tests[i].index][k].y;
D3DCOLOR color_off = expected_depth[depth_tests[i].index][k].color_off;
D3DCOLOR color_amd = expected_depth[depth_tests[i].index][k].color_amd;
D3DCOLOR color_intel = expected_depth[depth_tests[i].index][k].color_intel;
D3DCOLOR color = get_readback_color(&rb, x, y);
/* When Fetch4 is OFF, ignore G and B channels on windows.
* Some implementations will copy R=G=B, some will set them to 0 */
if(j == 0)
ok((isWin && color_match(color & 0xffff0000, color_off & 0xffff0000, 2))
|| color_match(color, color_off, 2),
"Test OFF Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n",
color_off, x, y, depth_tests[i].name, color);
else
ok(color_match(color, color_amd, 2)
|| (isWin && color_match(color, color_intel, 2)),
"Test ON Expected colors 0x%08x || 0x%08x at (%u, %u) for format %s, got 0x%08x.\n",
color_amd, color_intel, x, y, depth_tests[i].name, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
hr = IDirect3DDevice9_SetTexture(device, 0, NULL);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
IDirect3DTexture9_Release(depth_texture);
- }
- IDirect3DVolumeTexture9_Release(texture3D);
- IDirect3DTexture9_Release(texture);
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
if (shaders[i].ps)
IDirect3DPixelShader9_Release(shaders[i].ps);
- IDirect3DPixelShader9_Release(ps_3d);
- IDirect3DVertexShader9_Release(vs);
- IDirect3DSurface9_Release(rt);
- IDirect3DSurface9_Release(original_ds);
- IDirect3DSurface9_Release(original_rt);
- refcount = IDirect3DDevice9_Release(device);
- ok(!refcount, "Device has %u references left.\n", refcount);
+done:
- IDirect3D9_Release(d3d);
- DestroyWindow(window);
+}
- static void shadow_test(void) { static const DWORD ps_code[] =
@@ -24660,6 +25303,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test();
- fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi Axel,
Thanks for testing with 3dmark06. I tried to run it without luck yesterday, apparently there is some winetrick that i am missing...
Regarding the linear sampler state. I havent seen any case where setting the filters to LINEAR disables fetch4. From the tests, it only changes slightly the results on R500 AMD by changing the 0.5 texel offset.
My guess, is that all apps that use fetch4 have that into account, and disable it with LODBIAS explicitly. Sampling a 1x1 L8 texture with fetch4 will return the same value on RGBA, which is similar to a normal sample.
Regarding 3D textures, all implementations behave differently. Since 3D textures have 8 texels, returning the 4 texels used for linear sampling is quite ambiguous. Intel decides to sample at .xy0 and ignore the Z axis. Some AMD devices disable fetch4, some others sample at the nearest z coordinate.
In my opinion, we are hitting implementation bugs/corner cases. It is simpler to just keep fetch4 off there. Especially since implementing it is quite difficult.
The tests of 3D textures in wine are just testing that fetch4 is off (no todo, it should be always off). But on windows, it tests that some of the 4 possibles cases seen on AMD and intel is obtained. How useful those tests are, i am not sure. I just left them there for reference.
Maybe we should remove those tests on windows, or even add todo in some of the cases for 3d. But in that case, what implementation do we want to follow?
Best regards, Daniel
On Sat, 2 Feb 2019, 19:14 Axel Davy <[email protected] wrote:
Hi,
Apparently 3DMark06 believes somehow that LINEAR disables FETCH4. It sets MIPFILTER, MINFILTER and MAGFILTER to LINEAR, and resets ADDRESSV, ADDRESSU and MIPMAPLODBIAS (1, 1, 0 respectively) then behaves as if FETCH4 was disabled (it later disables it definitely with the MIPMAPLODBIAS setting).
I don't see any obvious visual glitch, though. The texture sampled with FETCH4 which isn't meant to be sampled with it (looking at how the shader values are used) is a D3DFMT_L8 of size 1x1...
I think all this points out that one has to be very careful about FETCH4 corner cases.
Thus I would suggest adding more formats and checks to your code, like for example INTZ, ATI1, ATI2 ?
For the part "Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray", shouldn't the test be with a wine_todo rather than a isWin check ?
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- Test texld/texldp/texldd/texldb/texldl in PS and FFP
- Test supported/unsupported texture formats on FFP/texld/texldp
- Test 3dtextures (Disabled, each platform has different results)
- Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on
windows)
Signed-off-by: Daniel Ansorregui [email protected]
dlls/d3d9/tests/visual.c | 644 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..028fc23078 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,649 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{
- static const DWORD vs_code[] =
- {
0xfffe0300, /*
vs_3_0 */
0x0200001f, 0x80000000, 0x900f0000, /*
dcl_position v0 */
0x0200001f, 0x80000005, 0x900f0001, /*
dcl_texcoord v1 */
0x0200001f, 0x80000000, 0xe00f0000, /*
dcl_position o0 */
0x0200001f, 0x80000005, 0xe00f0001, /*
dcl_texcoord o1 */
0x02000001, 0xe00f0000, 0x90e40000, /*
mov o0, v0 */
0x02000001, 0xe00f0001, 0x90e40001, /*
mov o1, v1 */
0x0000ffff
- };
- static const DWORD ps_code_texld[] =
- {
/* Test texld */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const DWORD ps_code_texldp[] =
- {
/* Test texldp : AMD and Wine uses the projection on Fetch4,
Intel UHD 620 does not apply it */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000,
0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */
0x02000001, 0x80030000, 0x90540000,
/* mov r0.xy, v0.xyyy */
0x02000001, 0x800c0000, 0xa0fe0000,
/* mov r0.zw, c0.zwww */
0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldp r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldd[] =
- {
/* Test texldd : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Sampling LOD gradient should be ignored. Same
result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000,
0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */
0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000,
0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */
0x02000001, 0x800f0002, 0xa0e40000,
/* mov r2, c0 */
0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000,
0x80e40002, /* texldd r0, v0, s0, c0, r2 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldb[] =
- {
/* Test texldb : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000,
0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldb r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldl[] =
- {
/* Test texldl : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* The explicit LOD level is then ignored. Same
result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldl */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000,
0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldl r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_3d[] =
- {
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0xa0000000, 0xa00f0800,
/* dcl_volume s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const struct
- {
struct vec3 position;
struct vec3 texcoord;
- }
- quad[] =
- {
/* Tilted on Z axis to get a depth gradient in the depth test */
/* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth
tests */
{{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} },
{{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} },
{{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} },
{{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} }
- };
- static const struct
- {
UINT x[4], y[4]; /* Matrix Sampling positions */
D3DCOLOR color_amd[16]; /* AMD original implementation
swizzle with -0.5 texel coord */
D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation
swizzle with no texel coord correction */
/* Wine follows the AMD immplementation, and consider an error
the Intel one results
* However, the test will accept as valid the intel only if
running on windows */
D3DCOLOR color_3d_fetch4_off[16];
D3DCOLOR color_fetch4_off[16];
- }
- expected_colors =
- {
{ 40, 200, 360, 520},
{ 30, 150, 270, 390},
/* AMD implementation - Wine implementation */
{0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102,
0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304,
0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023,
0x23221312, 0x22211211, 0x21201110, 0x20231013},
/* Intel UHD 620 implementation */
{0x23102013, 0x22132312, 0x21122211, 0x20112110,
0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101,
0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103,
0x04200323, 0xf4230422, 0xf322f421, 0x0321f320},
/* Fetch4 off on 3D textures */
{0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101,
0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303,
0xff232323, 0xff222222, 0xff212121, 0xff202020,
0xff131313, 0xff121212, 0xff111111, 0xff101010},
/* Fetch4 off on 2D texture */
{0x13131313, 0x12121212, 0x11111111, 0x10101010,
0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101,
0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303,
0x23232323, 0x22222222, 0x21212121, 0x20202020}
- };
- static const DWORD fetch4_data[] = {0x10111213,
0x01f1f202,
0x03f3f404,
0x20212223};
- static struct
- {
IDirect3DVertexShader9 *vs;
IDirect3DPixelShader9 *ps;
const DWORD *ps_code;
const char *name;
BOOL projection; /* The results should be projected
(zoomed by 2) */
BOOL allow_off; /* Do not enforce Fetch4 enabled on
this one on Windows */
- }
- shaders[] =
- {
{NULL, NULL, NULL, "FFP", FALSE, FALSE},
{NULL, NULL, ps_code_texld, "texld", FALSE, FALSE},
{NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE},
{NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE},
{NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE},
{NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE},
- };
- static const struct
- {
D3DFORMAT format; /* The format of the texture */
DWORD data; /* The data we will write to the
first line */
UINT x, y; /* Where we expect the color to be
*/
BOOL broken_wine; /* Do not check it on wine because
is known ot be broken */
D3DCOLOR color_amd[3]; /* Wine results. Results on AMD
swizzle + texture offset */
D3DCOLOR color_intel[3]; /* Results with intel UHD 620,
intel swizzle + no texel offset */
- }
- format_tests[] =
- {
/* Enabled formats */
{D3DFMT_L8, 0xff804010, 360, 270, FALSE,
{0x00004010, 0x00004010, 0x10400000},
{0x40001000, 0x40001000, 0x40001000}
},
{D3DFMT_L16, 0xff804010, 360, 270, FALSE,
{0x0000ff40, 0x0000ff40, 0x40ff0000},
{0xff004000, 0xff004000, 0xff004000}
},
{D3DFMT_R16F, 0x38003c00, 360, 270, FALSE,
{0x000080ff, 0x000080ff, 0xff800000},
{0x8000ff00, 0x8000ff00, 0x8000ff00}
},
{D3DFMT_R32F, 0x3f000000, 360, 270, FALSE,
{0x00000080, 0x00000080, 0x80000000},
{0x00008000, 0x00008000, 0x00008000}
},
/* Disabled format on Intel, enabled on AMD, broken on wine
* since it is implemented with GL_ALPHA, and fetch4 will fetch
RED value */
{D3DFMT_A8, 0xff804010, 360, 270, TRUE,
{0x00004010, 0x00004010, 0x10400000},
{0x00000000, 0x00000000, 0x00000000}
},
/* Disabled format */
{D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE,
{0x00000000, 0x00000000, 0xff804010},
{0x00000000, 0x00000000, 0xff804010}
},
- };
- static const struct
- {
D3DCOLOR color_off, color_amd, color_intel;
UINT x, y;
- }
- expected_depth[][4] =
- {
{
/* This is the expected result for shadow samplers */
{0xffffffff,0xffffffff,0xffffffff, 20, 15},
{0xffffffff,0xffffffff,0xffffffff,260, 15},
{0x00000000,0x00000000,0x00000000, 20,255},
{0x00000000,0x00000000,0x00000000,260,135},
},
{
/* This is the expected result with DF16 */
{0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
},
{
/* This is the expected result with DF24 */
{0xffff0000,0xffdfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
}
- };
- static const struct
- {
D3DFORMAT format;
const char *name;
UINT index;
- }
- depth_tests[] =
- {
{D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0},
{D3DFMT_D32, "D32", 0},
{D3DFMT_D15S1, "D15S1", 0},
{D3DFMT_D24S8, "D24S8", 0},
{D3DFMT_D24X8, "D24X8", 0},
{D3DFMT_D24X4S4, "D24X4S4", 0},
{D3DFMT_D16, "D16", 0},
{D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0},
{D3DFMT_D24FS8, "D24FS8", 0},
{MAKEFOURCC('D','F','1','6'), "DF16", 1},
{MAKEFOURCC('D','F','2','4'), "DF24", 2},
- };
- const BOOL isWin = strcmp(winetest_platform, "wine");
- IDirect3DSurface9 *original_ds, *original_rt, *rt;
- IDirect3DVolumeTexture9 *texture3D;
- IDirect3DPixelShader9 *ps_3d;
- struct surface_readback rb;
- IDirect3DVertexShader9 *vs;
- IDirect3DTexture9 *texture;
- IDirect3DDevice9 *device;
- D3DLOCKED_RECT lr;
- D3DLOCKED_BOX lb;
- IDirect3D9 *d3d;
- ULONG refcount;
- D3DCAPS9 caps;
- UINT i, j, k;
- HWND window;
- HRESULT hr;
- window = create_window();
- d3d = Direct3DCreate9(D3D_SDK_VERSION);
- ok(!!d3d, "Failed to create a D3D object.\n");
- if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE,
MAKEFOURCC('D','F','2','4'))))
- {
skip("No DF24 support, skipping FETCH4 test.\n");
goto done;
- }
- if (!(device = create_device(d3d, window, window, TRUE)))
- {
skip("Failed to create a D3D device, skipping tests.\n");
goto done;
- }
- hr = IDirect3DDevice9_GetDeviceCaps(device, &caps);
- ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr);
- if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0))
- {
skip("No pixel shader 3.0 support, skipping FETCH4 test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
- hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt);
- ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds);
- ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8,
D3DFMT_A8R8G8B8,
D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL);
- ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr);
- /* Create our texture for FETCH4 shader testing */
- hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8,
D3DPOOL_MANAGED, &texture, NULL);
- ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
- hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
- hr = IDirect3DTexture9_UnlockRect(texture, 0);
- ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
- /* Create vertex shader */
- hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs);
- ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned
%08x\n", hr);
- /* Prepare the pixel shaders */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
if (shaders[i].ps_code)
{
hr = IDirect3DDevice9_CreatePixelShader(device,
shaders[i].ps_code, &shaders[i].ps);
ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n",
hr);
/* Copy vertex shader pointer if a PS is present */
shaders[i].vs = vs;
}
- }
- hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d);
- ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 |
D3DFVF_TEXCOORDSIZE3(0));
- ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE,
D3DZB_TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC,
D3DCMP_ALWAYS);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE,
TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9
*)texture);
- ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
- /* According to the spec, FETCH4 is enabled when
D3DSAMP_MIPMAPLODBIAS == GET4 and also
* D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is
needed for it to get active.
* However, AMD HW r500 samples always as if POINT (nearest
filtering) is selected with FETCH4
* the driver later on corrected this by adding -0.5 texel coord. */
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4'));
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER,
D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER,
D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER,
D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/***********************************************************************
* Tests for FFP/PS correctness when using L8 texture with fetch4. *
***********************************************************************/
- /* Render with fetch4 and test if we obtain proper results for all
sampler FFP/PS instructions */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET |
D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd =
expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
ok(color_match(color, color_amd, 1)
|| (isWin && (color_match(color, color_intel, 1) ||
shaders[i].allow_off)),
"Test %s Expected color 0x%08x at (%u, %u), got
0x%08x.\n", shaders[i].name,
color_amd, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
/***************************************************************************
* Tests for fetch4 enable/disable with different texture formats
in FFP/PS. *
***************************************************************************/
- /* Create the textures to test FETCH4 does work/not work there as
expected */
- for (i = 0; i < ARRAY_SIZE(format_tests); ++i)
- {
IDirect3DTexture9 *tex;
hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0,
format_tests[i].format,
D3DPOOL_MANAGED, &tex, NULL);
ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0);
ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
memcpy(lr.pBits, &format_tests[i].data, 4);
hr = IDirect3DTexture9_UnlockRect(tex, 0);
ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)tex);
ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
/* Test if FETCH4 is enabled/disabled when different textures
are used with FFP/texld/texldp */
for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j)
{
hr = IDirect3DDevice9_SetVertexShader(device,
shaders[j].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device,
shaders[j].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL,
D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
D3DCOLOR color = get_readback_color(&rb, format_tests[i].x,
format_tests[i].y);
D3DCOLOR color_amd = format_tests[i].color_amd[j];
D3DCOLOR color_intel = format_tests[i].color_intel[j];
todo_wine_if(format_tests[i].broken_wine)
ok(color_match(color, color_amd, 1)
|| (isWin && color_match(color, color_intel, 1)),
"Test %d,%s expected color 0x%08x at (%u, %u), got
0x%08x.\n", i, shaders[j].name,
color_amd, format_tests[i].x, format_tests[i].y,
color);
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
IDirect3DTexture9_Release(tex);
- }
- /**************************************************
* Tests that fetch4 works with 3D textures. *
**************************************************/
- /* Create volume (3D) texture */
- IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0,
D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL );
- ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr);
- hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
- {
memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
/* Shift the lower level, to keep it different */
memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch,
&fetch4_data[(i+1)%4], sizeof(fetch4_data[i]));
- }
- hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0);
- ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9
*)texture3D);
- ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
- /* Test FFP and texld with dcl_volume (ps_3d) */
- for (i = 0; i < 2; ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET |
D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd = expected_colors.color_amd[j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j];
D3DCOLOR color_zround = expected_colors.color_amd[(j+4) %
ARRAY_SIZE(expected_colors.color_amd)];
/* FIXME: Fetch4 on 3D textures have different results
based on the vendor/driver
* - AMD "HD 5700" rounds to nearest "z" texel, and does
fetch4 normally on .xy
* - AMD "R500" has fetch4 disabled
* - AMD "R580" has fetch4 enabled sampling at .xy0
* - Intel UHD 620 sample with fetch4 at .xy0
* Currently unimplemented on wine due to lack of GL
functionality to cast 3D->2DArray
* Wine produces same results as if fetch4 is not enabled
(which probably is better)
* Test will pass on windows if either one of the allowed
results is returned */
if(isWin)
ok(color_match(color, color_zround, 2) ||
color_match(color, color_off, 2)
|| color_match(color, color_intel, 2) ||
color_match(color, color_amd, 2),
"Test 3D %s Expected colors 0x%08x || 0x%08x ||
0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n",
shaders[i].name, color_amd, color_zround,
color_off, color_intel, x, y, color);
else
ok(color_match(color, color_off, 2),
"Test 3D %s Expected color 0x%08x at (%u, %u),
got 0x%08x.\n", shaders[i].name,
color_off, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
- /********************************************************
* Tests for fetch4 enable/disable with depth textures. *
********************************************************/
- for (i = 0; i < ARRAY_SIZE(depth_tests); ++i)
- {
D3DFORMAT format = depth_tests[i].format;
IDirect3DTexture9 *depth_texture;
IDirect3DSurface9 *ds;
if (FAILED(IDirect3D9_CheckDeviceFormat(d3d,
D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL,
D3DRTYPE_TEXTURE, format)))
continue;
hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1,
D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT,
&depth_texture, NULL);
ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr);
hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds);
ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetVertexShader(device, NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)texture);
ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1'));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Setup the depth/stencil surface. */
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER,
0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
/* Render to the depth surface */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
IDirect3DSurface9_Release(ds);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)depth_texture);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
/* Set a shader for depth sampling, otherwise windows does not
show anything */
hr = IDirect3DDevice9_SetVertexShader(device, vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /*
same as texld */
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
for (j = 0; j < 2; ++j){
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ?
'4' : '1' ));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Do the actual shadow mapping. */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (k = 0; k <
ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k)
{
UINT x = expected_depth[depth_tests[i].index][k].x;
UINT y = expected_depth[depth_tests[i].index][k].y;
D3DCOLOR color_off =
expected_depth[depth_tests[i].index][k].color_off;
D3DCOLOR color_amd =
expected_depth[depth_tests[i].index][k].color_amd;
D3DCOLOR color_intel =
expected_depth[depth_tests[i].index][k].color_intel;
D3DCOLOR color = get_readback_color(&rb, x, y);
/* When Fetch4 is OFF, ignore G and B channels on
windows.
* Some implementations will copy R=G=B, some will set
them to 0 */
if(j == 0)
ok((isWin && color_match(color & 0xffff0000,
color_off & 0xffff0000, 2))
|| color_match(color, color_off, 2),
"Test OFF Expected color 0x%08x at (%u, %u)
for format %s, got 0x%08x.\n",
color_off, x, y, depth_tests[i].name,
color);
else
ok(color_match(color, color_amd, 2)
|| (isWin && color_match(color,
color_intel, 2)),
"Test ON Expected colors 0x%08x || 0x%08x
at (%u, %u) for format %s, got 0x%08x.\n",
color_amd, color_intel, x, y,
depth_tests[i].name, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
hr = IDirect3DDevice9_SetTexture(device, 0, NULL);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
IDirect3DTexture9_Release(depth_texture);
- }
- IDirect3DVolumeTexture9_Release(texture3D);
- IDirect3DTexture9_Release(texture);
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
if (shaders[i].ps)
IDirect3DPixelShader9_Release(shaders[i].ps);
- IDirect3DPixelShader9_Release(ps_3d);
- IDirect3DVertexShader9_Release(vs);
- IDirect3DSurface9_Release(rt);
- IDirect3DSurface9_Release(original_ds);
- IDirect3DSurface9_Release(original_rt);
- refcount = IDirect3DDevice9_Release(device);
- ok(!refcount, "Device has %u references left.\n", refcount);
+done:
- IDirect3D9_Release(d3d);
- DestroyWindow(window);
+}
- static void shadow_test(void) { static const DWORD ps_code[] =
@@ -24660,6 +25303,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test();
- fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi,
L8 is xxx1, thus sampling it with FETCH4 is different for the A coordinate.
I understand your point for disabling it in wine for complex cases. But from what I'm used to in visual tests, nine doesn't do always the same things as wine but it isn't a problem as the wine behavior is not specifically checked when on wine. And we pass some wine_todo. With your approach we would fail even if we implement a correct behavior. But, of course, it is up to wine developers, if they want wine tests to fail on nine because it doesn't do exactly like wine.
You didn't comment on ATI1 and INTZ. Both have only one channel, so I would assume it's an interesting thing to test for FETCH4.
Axel
On 02/02/2019 20:50, DarkZeros wrote:
Hi Axel,
Thanks for testing with 3dmark06. I tried to run it without luck yesterday, apparently there is some winetrick that i am missing...
Regarding the linear sampler state. I havent seen any case where setting the filters to LINEAR disables fetch4. From the tests, it only changes slightly the results on R500 AMD by changing the 0.5 texel offset.
My guess, is that all apps that use fetch4 have that into account, and disable it with LODBIAS explicitly. Sampling a 1x1 L8 texture with fetch4 will return the same value on RGBA, which is similar to a normal sample.
Regarding 3D textures, all implementations behave differently. Since 3D textures have 8 texels, returning the 4 texels used for linear sampling is quite ambiguous. Intel decides to sample at .xy0 and ignore the Z axis. Some AMD devices disable fetch4, some others sample at the nearest z coordinate.
In my opinion, we are hitting implementation bugs/corner cases. It is simpler to just keep fetch4 off there. Especially since implementing it is quite difficult.
The tests of 3D textures in wine are just testing that fetch4 is off (no todo, it should be always off). But on windows, it tests that some of the 4 possibles cases seen on AMD and intel is obtained. How useful those tests are, i am not sure. I just left them there for reference.
Maybe we should remove those tests on windows, or even add todo in some of the cases for 3d. But in that case, what implementation do we want to follow?
Best regards, Daniel
On Sat, 2 Feb 2019, 19:14 Axel Davy <[email protected] mailto:[email protected] wrote:
Hi, Apparently 3DMark06 believes somehow that LINEAR disables FETCH4. It sets MIPFILTER, MINFILTER and MAGFILTER to LINEAR, and resets ADDRESSV, ADDRESSU and MIPMAPLODBIAS (1, 1, 0 respectively) then behaves as if FETCH4 was disabled (it later disables it definitely with the MIPMAPLODBIAS setting). I don't see any obvious visual glitch, though. The texture sampled with FETCH4 which isn't meant to be sampled with it (looking at how the shader values are used) is a D3DFMT_L8 of size 1x1... I think all this points out that one has to be very careful about FETCH4 corner cases. Thus I would suggest adding more formats and checks to your code, like for example INTZ, ATI1, ATI2 ? For the part "Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray", shouldn't the test be with a wine_todo rather than a isWin check ? Axel On 01/02/2019 01:56, Daniel Ansorregui wrote: > - Test texld/texldp/texldd/texldb/texldl in PS and FFP > - Test supported/unsupported texture formats on FFP/texld/texldp > - Test 3dtextures (Disabled, each platform has different results) > - Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows) > > Signed-off-by: Daniel Ansorregui <[email protected] <mailto:[email protected]>> > --- > dlls/d3d9/tests/visual.c | 644 +++++++++++++++++++++++++++++++++++++++ > 1 file changed, 644 insertions(+) > > diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c > index c06acb77d4..028fc23078 100644 > --- a/dlls/d3d9/tests/visual.c > +++ b/dlls/d3d9/tests/visual.c > @@ -15234,6 +15234,649 @@ done: > DestroyWindow(window); > } > > +static void fetch4_test(void) > +{ > + static const DWORD vs_code[] = > + { > + 0xfffe0300, /* vs_3_0 */ > + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ > + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ > + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ > + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ > + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ > + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ > + 0x0000ffff > + }; > + static const DWORD ps_code_texld[] = > + { > + /* Test texld */ > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ > + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff /* end */ > + }; > + static const DWORD ps_code_texldp[] = > + { > + /* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */ > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ > + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */ > + 0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */ > + 0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw <http://r0.zw>, c0.zwww */ > + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff, /* end */ > + }; > + static const DWORD ps_code_texldd[] = > + { > + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. > + * Sampling LOD gradient should be ignored. Same result as texld */ > + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ > + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ > + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ > + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ > + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff, /* end */ > + }; > + static const DWORD ps_code_texldb[] = > + { > + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. > + * Same result as texld */ > + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ > + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ > + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ > + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff, /* end */ > + }; > + static const DWORD ps_code_texldl[] = > + { > + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. > + * The explicit LOD level is then ignored. Same result as texld */ > + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldl */ > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ > + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ > + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ > + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff, /* end */ > + }; > + static const DWORD ps_code_3d[] = > + { > + 0xffff0300, /* ps_3_0 */ > + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ > + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ > + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ > + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ > + 0x0000ffff /* end */ > + }; > + > + static const struct > + { > + struct vec3 position; > + struct vec3 texcoord; > + } > + quad[] = > + { > + /* Tilted on Z axis to get a depth gradient in the depth test */ > + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ > + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, > + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, > + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, > + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } > + }; > + > + static const struct > + { > + UINT x[4], y[4]; /* Matrix Sampling positions */ > + D3DCOLOR color_amd[16]; /* AMD original implementation swizzle with -0.5 texel coord */ > + D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation swizzle with no texel coord correction */ > + /* Wine follows the AMD immplementation, and consider an error the Intel one results > + * However, the test will accept as valid the intel only if running on windows */ > + D3DCOLOR color_3d_fetch4_off[16]; > + D3DCOLOR color_fetch4_off[16]; > + } > + expected_colors = > + { > + { 40, 200, 360, 520}, > + { 30, 150, 270, 390}, > + /* AMD implementation - Wine implementation */ > + {0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102, > + 0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304, > + 0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023, > + 0x23221312, 0x22211211, 0x21201110, 0x20231013}, > + /* Intel UHD 620 implementation */ > + {0x23102013, 0x22132312, 0x21122211, 0x20112110, > + 0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101, > + 0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103, > + 0x04200323, 0xf4230422, 0xf322f421, 0x0321f320}, > + /* Fetch4 off on 3D textures */ > + {0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101, > + 0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303, > + 0xff232323, 0xff222222, 0xff212121, 0xff202020, > + 0xff131313, 0xff121212, 0xff111111, 0xff101010}, > + /* Fetch4 off on 2D texture */ > + {0x13131313, 0x12121212, 0x11111111, 0x10101010, > + 0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101, > + 0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303, > + 0x23232323, 0x22222222, 0x21212121, 0x20202020} > + }; > + > + static const DWORD fetch4_data[] = {0x10111213, > + 0x01f1f202, > + 0x03f3f404, > + 0x20212223}; > + > + static struct > + { > + IDirect3DVertexShader9 *vs; > + IDirect3DPixelShader9 *ps; > + const DWORD *ps_code; > + const char *name; > + BOOL projection; /* The results should be projected (zoomed by 2) */ > + BOOL allow_off; /* Do not enforce Fetch4 enabled on this one on Windows */ > + } > + shaders[] = > + { > + {NULL, NULL, NULL, "FFP", FALSE, FALSE}, > + {NULL, NULL, ps_code_texld, "texld", FALSE, FALSE}, > + {NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE}, > + {NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE}, > + {NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE}, > + {NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE}, > + }; > + > + static const struct > + { > + D3DFORMAT format; /* The format of the texture */ > + DWORD data; /* The data we will write to the first line */ > + UINT x, y; /* Where we expect the color to be */ > + BOOL broken_wine; /* Do not check it on wine because is known ot be broken */ > + D3DCOLOR color_amd[3]; /* Wine results. Results on AMD swizzle + texture offset */ > + D3DCOLOR color_intel[3]; /* Results with intel UHD 620, intel swizzle + no texel offset */ > + } > + format_tests[] = > + { > + /* Enabled formats */ > + {D3DFMT_L8, 0xff804010, 360, 270, FALSE, > + {0x00004010, 0x00004010, 0x10400000}, > + {0x40001000, 0x40001000, 0x40001000} > + }, > + {D3DFMT_L16, 0xff804010, 360, 270, FALSE, > + {0x0000ff40, 0x0000ff40, 0x40ff0000}, > + {0xff004000, 0xff004000, 0xff004000} > + }, > + {D3DFMT_R16F, 0x38003c00, 360, 270, FALSE, > + {0x000080ff, 0x000080ff, 0xff800000}, > + {0x8000ff00, 0x8000ff00, 0x8000ff00} > + }, > + {D3DFMT_R32F, 0x3f000000, 360, 270, FALSE, > + {0x00000080, 0x00000080, 0x80000000}, > + {0x00008000, 0x00008000, 0x00008000} > + }, > + > + /* Disabled format on Intel, enabled on AMD, broken on wine > + * since it is implemented with GL_ALPHA, and fetch4 will fetch RED value */ > + {D3DFMT_A8, 0xff804010, 360, 270, TRUE, > + {0x00004010, 0x00004010, 0x10400000}, > + {0x00000000, 0x00000000, 0x00000000} > + }, > + > + /* Disabled format */ > + {D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE, > + {0x00000000, 0x00000000, 0xff804010}, > + {0x00000000, 0x00000000, 0xff804010} > + }, > + }; > + > + static const struct > + { > + D3DCOLOR color_off, color_amd, color_intel; > + UINT x, y; > + } > + expected_depth[][4] = > + { > + { > + /* This is the expected result for shadow samplers */ > + {0xffffffff,0xffffffff,0xffffffff, 20, 15}, > + {0xffffffff,0xffffffff,0xffffffff,260, 15}, > + {0x00000000,0x00000000,0x00000000, 20,255}, > + {0x00000000,0x00000000,0x00000000,260,135}, > + }, > + { > + /* This is the expected result with DF16 */ > + {0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15}, > + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, > + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, > + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, > + }, > + { > + /* This is the expected result with DF24 */ > + {0xffff0000,0xffdfdfbf,0x202000ff, 20, 15}, > + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, > + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, > + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, > + } > + }; > + > + static const struct > + { > + D3DFORMAT format; > + const char *name; > + UINT index; > + } > + depth_tests[] = > + { > + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, > + {D3DFMT_D32, "D32", 0}, > + {D3DFMT_D15S1, "D15S1", 0}, > + {D3DFMT_D24S8, "D24S8", 0}, > + {D3DFMT_D24X8, "D24X8", 0}, > + {D3DFMT_D24X4S4, "D24X4S4", 0}, > + {D3DFMT_D16, "D16", 0}, > + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, > + {D3DFMT_D24FS8, "D24FS8", 0}, > + {MAKEFOURCC('D','F','1','6'), "DF16", 1}, > + {MAKEFOURCC('D','F','2','4'), "DF24", 2}, > + }; > + > + const BOOL isWin = strcmp(winetest_platform, "wine"); > + > + IDirect3DSurface9 *original_ds, *original_rt, *rt; > + IDirect3DVolumeTexture9 *texture3D; > + IDirect3DPixelShader9 *ps_3d; > + struct surface_readback rb; > + IDirect3DVertexShader9 *vs; > + IDirect3DTexture9 *texture; > + IDirect3DDevice9 *device; > + D3DLOCKED_RECT lr; > + D3DLOCKED_BOX lb; > + IDirect3D9 *d3d; > + ULONG refcount; > + D3DCAPS9 caps; > + UINT i, j, k; > + HWND window; > + HRESULT hr; > + > + window = create_window(); > + d3d = Direct3DCreate9(D3D_SDK_VERSION); > + ok(!!d3d, "Failed to create a D3D object.\n"); > + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, > + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) > + { > + skip("No DF24 support, skipping FETCH4 test.\n"); > + goto done; > + } > + if (!(device = create_device(d3d, window, window, TRUE))) > + { > + skip("Failed to create a D3D device, skipping tests.\n"); > + goto done; > + } > + > + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); > + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); > + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) > + { > + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); > + IDirect3DDevice9_Release(device); > + goto done; > + } > + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); > + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); > + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, > + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); > + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); > + > + /* Create our texture for FETCH4 shader testing */ > + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); > + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); > + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); > + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); > + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) > + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); > + hr = IDirect3DTexture9_UnlockRect(texture, 0); > + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); > + > + /* Create vertex shader */ > + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); > + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); > + /* Prepare the pixel shaders */ > + for (i = 0; i < ARRAY_SIZE(shaders); ++i) > + { > + if (shaders[i].ps_code) > + { > + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); > + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); > + /* Copy vertex shader pointer if a PS is present */ > + shaders[i].vs = vs; > + } > + } > + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); > + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); > + > + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); > + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); > + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); > + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); > + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); > + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); > + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); > + > + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 and also > + * D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed for it to get active. > + * However, AMD HW r500 samples always as if POINT (nearest filtering) is selected with FETCH4 > + * the driver later on corrected this by adding -0.5 texel coord. */ > + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + > + /*********************************************************************** > + * Tests for FFP/PS correctness when using L8 texture with fetch4. * > + ***********************************************************************/ > + > + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ > + for (i = 0; i < ARRAY_SIZE(shaders); ++i) > + { > + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); > + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); > + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); > + > + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); > + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_BeginScene(device); > + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); > + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_EndScene(device); > + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); > + > + get_rt_readback(original_rt, &rb); > + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) > + { > + UINT x = expected_colors.x[j % 4]; > + UINT y = expected_colors.y[j / 4]; > + D3DCOLOR color = get_readback_color(&rb, x, y); > + D3DCOLOR color_amd = expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j]; > + D3DCOLOR color_intel = expected_colors.color_intel[j]; > + ok(color_match(color, color_amd, 1) > + || (isWin && (color_match(color, color_intel, 1) || shaders[i].allow_off)), > + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, > + color_amd, x, y, color); > + } > + release_surface_readback(&rb); > + > + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); > + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); > + } > + > + /*************************************************************************** > + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * > + ***************************************************************************/ > + > + /* Create the textures to test FETCH4 does work/not work there as expected */ > + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) > + { > + IDirect3DTexture9 *tex; > + hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0, format_tests[i].format, > + D3DPOOL_MANAGED, &tex, NULL); > + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); > + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); > + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); > + memcpy(lr.pBits, &format_tests[i].data, 4); > + hr = IDirect3DTexture9_UnlockRect(tex, 0); > + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); > + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); > + > + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ > + for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j) > + { > + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); > + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); > + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); > + > + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); > + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_BeginScene(device); > + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); > + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_EndScene(device); > + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); > + > + get_rt_readback(original_rt, &rb); > + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); > + D3DCOLOR color_amd = format_tests[i].color_amd[j]; > + D3DCOLOR color_intel = format_tests[i].color_intel[j]; > + todo_wine_if(format_tests[i].broken_wine) ok(color_match(color, color_amd, 1) > + || (isWin && color_match(color, color_intel, 1)), > + "Test %d,%s expected color 0x%08x at (%u, %u), got 0x%08x.\n", i, shaders[j].name, > + color_amd, format_tests[i].x, format_tests[i].y, color); > + release_surface_readback(&rb); > + > + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); > + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); > + } > + IDirect3DTexture9_Release(tex); > + } > + > + /************************************************** > + * Tests that fetch4 works with 3D textures. * > + **************************************************/ > + > + /* Create volume (3D) texture */ > + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); > + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); > + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); > + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); > + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) > + { > + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); > + /* Shift the lower level, to keep it different */ > + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); > + } > + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); > + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); > + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); > + > + /* Test FFP and texld with dcl_volume (ps_3d) */ > + for (i = 0; i < 2; ++i) > + { > + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); > + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); > + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); > + > + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); > + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_BeginScene(device); > + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); > + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_EndScene(device); > + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); > + get_rt_readback(original_rt, &rb); > + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) > + { > + UINT x = expected_colors.x[j % 4]; > + UINT y = expected_colors.y[j / 4]; > + D3DCOLOR color = get_readback_color(&rb, x, y); > + D3DCOLOR color_amd = expected_colors.color_amd[j]; > + D3DCOLOR color_intel = expected_colors.color_intel[j]; > + D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j]; > + D3DCOLOR color_zround = expected_colors.color_amd[(j+4) % ARRAY_SIZE(expected_colors.color_amd)]; > + /* FIXME: Fetch4 on 3D textures have different results based on the vendor/driver > + * - AMD "HD 5700" rounds to nearest "z" texel, and does fetch4 normally on .xy > + * - AMD "R500" has fetch4 disabled > + * - AMD "R580" has fetch4 enabled sampling at .xy0 > + * - Intel UHD 620 sample with fetch4 at .xy0 > + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray > + * Wine produces same results as if fetch4 is not enabled (which probably is better) > + * Test will pass on windows if either one of the allowed results is returned */ > + if(isWin) > + ok(color_match(color, color_zround, 2) || color_match(color, color_off, 2) > + || color_match(color, color_intel, 2) || color_match(color, color_amd, 2), > + "Test 3D %s Expected colors 0x%08x || 0x%08x || 0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n", > + shaders[i].name, color_amd, color_zround, color_off, color_intel, x, y, color); > + else > + ok(color_match(color, color_off, 2), > + "Test 3D %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, > + color_off, x, y, color); > + } > + release_surface_readback(&rb); > + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); > + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); > + } > + > + /******************************************************** > + * Tests for fetch4 enable/disable with depth textures. * > + ********************************************************/ > + > + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) > + { > + D3DFORMAT format = depth_tests[i].format; > + IDirect3DTexture9 *depth_texture; > + IDirect3DSurface9 *ds; > + > + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, > + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) > + continue; > + > + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, > + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); > + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); > + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); > + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); > + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); > + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetVertexShader(device, NULL); > + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetPixelShader(device, NULL); > + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); > + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetSamplerState(device, 0, > + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + > + /* Setup the depth/stencil surface. */ > + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); > + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); > + > + /* Render to the depth surface */ > + hr = IDirect3DDevice9_BeginScene(device); > + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); > + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_EndScene(device); > + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); > + > + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); > + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); > + IDirect3DSurface9_Release(ds); > + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); > + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); > + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); > + > + /* Set a shader for depth sampling, otherwise windows does not show anything */ > + hr = IDirect3DDevice9_SetVertexShader(device, vs); > + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); > + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ > + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); > + > + for (j = 0; j < 2; ++j){ > + hr = IDirect3DDevice9_SetSamplerState(device, 0, > + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); > + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); > + > + /* Do the actual shadow mapping. */ > + hr = IDirect3DDevice9_BeginScene(device); > + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); > + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); > + hr = IDirect3DDevice9_EndScene(device); > + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); > + > + get_rt_readback(original_rt, &rb); > + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) > + { > + UINT x = expected_depth[depth_tests[i].index][k].x; > + UINT y = expected_depth[depth_tests[i].index][k].y; > + D3DCOLOR color_off = expected_depth[depth_tests[i].index][k].color_off; > + D3DCOLOR color_amd = expected_depth[depth_tests[i].index][k].color_amd; > + D3DCOLOR color_intel = expected_depth[depth_tests[i].index][k].color_intel; > + D3DCOLOR color = get_readback_color(&rb, x, y); > + /* When Fetch4 is OFF, ignore G and B channels on windows. > + * Some implementations will copy R=G=B, some will set them to 0 */ > + if(j == 0) > + ok((isWin && color_match(color & 0xffff0000, color_off & 0xffff0000, 2)) > + || color_match(color, color_off, 2), > + "Test OFF Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", > + color_off, x, y, depth_tests[i].name, color); > + else > + ok(color_match(color, color_amd, 2) > + || (isWin && color_match(color, color_intel, 2)), > + "Test ON Expected colors 0x%08x || 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", > + color_amd, color_intel, x, y, depth_tests[i].name, color); > + } > + release_surface_readback(&rb); > + > + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); > + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); > + } > + > + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); > + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); > + IDirect3DTexture9_Release(depth_texture); > + } > + > + IDirect3DVolumeTexture9_Release(texture3D); > + IDirect3DTexture9_Release(texture); > + for (i = 0; i < ARRAY_SIZE(shaders); ++i) > + if (shaders[i].ps) > + IDirect3DPixelShader9_Release(shaders[i].ps); > + IDirect3DPixelShader9_Release(ps_3d); > + IDirect3DVertexShader9_Release(vs); > + IDirect3DSurface9_Release(rt); > + IDirect3DSurface9_Release(original_ds); > + IDirect3DSurface9_Release(original_rt); > + refcount = IDirect3DDevice9_Release(device); > + ok(!refcount, "Device has %u references left.\n", refcount); > +done: > + IDirect3D9_Release(d3d); > + DestroyWindow(window); > +} > + > static void shadow_test(void) > { > static const DWORD ps_code[] = > @@ -24660,6 +25303,7 @@ START_TEST(visual) > depth_buffer2_test(); > depth_blit_test(); > intz_test(); > + fetch4_test(); > shadow_test(); > fp_special_test(); > depth_bounds_test();
Good point about the other 1 channel textures. I will add them to the lists of tests. I was just not sure how many texture formats have fetch4 enabled. Took just the ones that existed in D3D9 + DF16/DF24.
I think if the current state of tests will not pass on nine we can change them. Or set them as todo() or broken() if they fail on a platform that is not wine. Because anyway, it is not a failure we can fix if it os not running in wine. That will also cover possible cases that fail in the future on windows implementations of fetch4.
BR, Daniel
On Sat, 2 Feb 2019, 20:07 Axel Davy <[email protected] wrote:
Hi,
L8 is xxx1, thus sampling it with FETCH4 is different for the A coordinate.
I understand your point for disabling it in wine for complex cases. But from what I'm used to in visual tests, nine doesn't do always the same things as wine but it isn't a problem as the wine behavior is not specifically checked when on wine. And we pass some wine_todo. With your approach we would fail even if we implement a correct behavior. But, of course, it is up to wine developers, if they want wine tests to fail on nine because it doesn't do exactly like wine.
You didn't comment on ATI1 and INTZ. Both have only one channel, so I would assume it's an interesting thing to test for FETCH4.
Axel
On 02/02/2019 20:50, DarkZeros wrote:
Hi Axel,
Thanks for testing with 3dmark06. I tried to run it without luck yesterday, apparently there is some winetrick that i am missing...
Regarding the linear sampler state. I havent seen any case where setting the filters to LINEAR disables fetch4. From the tests, it only changes slightly the results on R500 AMD by changing the 0.5 texel offset.
My guess, is that all apps that use fetch4 have that into account, and disable it with LODBIAS explicitly. Sampling a 1x1 L8 texture with fetch4 will return the same value on RGBA, which is similar to a normal sample.
Regarding 3D textures, all implementations behave differently. Since 3D textures have 8 texels, returning the 4 texels used for linear sampling is quite ambiguous. Intel decides to sample at .xy0 and ignore the Z axis. Some AMD devices disable fetch4, some others sample at the nearest z coordinate.
In my opinion, we are hitting implementation bugs/corner cases. It is simpler to just keep fetch4 off there. Especially since implementing it is quite difficult.
The tests of 3D textures in wine are just testing that fetch4 is off (no todo, it should be always off). But on windows, it tests that some of the 4 possibles cases seen on AMD and intel is obtained. How useful those tests are, i am not sure. I just left them there for reference.
Maybe we should remove those tests on windows, or even add todo in some of the cases for 3d. But in that case, what implementation do we want to follow?
Best regards, Daniel
On Sat, 2 Feb 2019, 19:14 Axel Davy <[email protected] wrote:
Hi,
Apparently 3DMark06 believes somehow that LINEAR disables FETCH4. It sets MIPFILTER, MINFILTER and MAGFILTER to LINEAR, and resets ADDRESSV, ADDRESSU and MIPMAPLODBIAS (1, 1, 0 respectively) then behaves as if FETCH4 was disabled (it later disables it definitely with the MIPMAPLODBIAS setting).
I don't see any obvious visual glitch, though. The texture sampled with FETCH4 which isn't meant to be sampled with it (looking at how the shader values are used) is a D3DFMT_L8 of size 1x1...
I think all this points out that one has to be very careful about FETCH4 corner cases.
Thus I would suggest adding more formats and checks to your code, like for example INTZ, ATI1, ATI2 ?
For the part "Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray", shouldn't the test be with a wine_todo rather than a isWin check ?
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- Test texld/texldp/texldd/texldb/texldl in PS and FFP
- Test supported/unsupported texture formats on FFP/texld/texldp
- Test 3dtextures (Disabled, each platform has different results)
- Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on
windows)
Signed-off-by: Daniel Ansorregui [email protected]
dlls/d3d9/tests/visual.c | 644 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 644 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..028fc23078 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,649 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{
- static const DWORD vs_code[] =
- {
0xfffe0300, /*
vs_3_0 */
0x0200001f, 0x80000000, 0x900f0000, /*
dcl_position v0 */
0x0200001f, 0x80000005, 0x900f0001, /*
dcl_texcoord v1 */
0x0200001f, 0x80000000, 0xe00f0000, /*
dcl_position o0 */
0x0200001f, 0x80000005, 0xe00f0001, /*
dcl_texcoord o1 */
0x02000001, 0xe00f0000, 0x90e40000, /*
mov o0, v0 */
0x02000001, 0xe00f0001, 0x90e40001, /*
mov o1, v1 */
0x0000ffff
- };
- static const DWORD ps_code_texld[] =
- {
/* Test texld */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const DWORD ps_code_texldp[] =
- {
/* Test texldp : AMD and Wine uses the projection on Fetch4,
Intel UHD 620 does not apply it */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000,
0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */
0x02000001, 0x80030000, 0x90540000,
/* mov r0.xy, v0.xyyy */
0x02000001, 0x800c0000, 0xa0fe0000,
/* mov r0.zw, c0.zwww */
0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldp r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldd[] =
- {
/* Test texldd : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Sampling LOD gradient should be ignored. Same
result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000,
0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */
0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000,
0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */
0x02000001, 0x800f0002, 0xa0e40000,
/* mov r2, c0 */
0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000,
0x80e40002, /* texldd r0, v0, s0, c0, r2 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldb[] =
- {
/* Test texldb : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000,
0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldb r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldl[] =
- {
/* Test texldl : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* The explicit LOD level is then ignored. Same
result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on
texldl */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000,
0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldl r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_3d[] =
- {
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0xa0000000, 0xa00f0800,
/* dcl_volume s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const struct
- {
struct vec3 position;
struct vec3 texcoord;
- }
- quad[] =
- {
/* Tilted on Z axis to get a depth gradient in the depth test
*/
/* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth
tests */
{{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} },
{{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} },
{{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} },
{{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} }
- };
- static const struct
- {
UINT x[4], y[4]; /* Matrix Sampling positions */
D3DCOLOR color_amd[16]; /* AMD original implementation
swizzle with -0.5 texel coord */
D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation
swizzle with no texel coord correction */
/* Wine follows the AMD immplementation, and consider an error
the Intel one results
* However, the test will accept as valid the intel only if
running on windows */
D3DCOLOR color_3d_fetch4_off[16];
D3DCOLOR color_fetch4_off[16];
- }
- expected_colors =
- {
{ 40, 200, 360, 520},
{ 30, 150, 270, 390},
/* AMD implementation - Wine implementation */
{0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102,
0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304,
0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023,
0x23221312, 0x22211211, 0x21201110, 0x20231013},
/* Intel UHD 620 implementation */
{0x23102013, 0x22132312, 0x21122211, 0x20112110,
0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101,
0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103,
0x04200323, 0xf4230422, 0xf322f421, 0x0321f320},
/* Fetch4 off on 3D textures */
{0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101,
0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303,
0xff232323, 0xff222222, 0xff212121, 0xff202020,
0xff131313, 0xff121212, 0xff111111, 0xff101010},
/* Fetch4 off on 2D texture */
{0x13131313, 0x12121212, 0x11111111, 0x10101010,
0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101,
0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303,
0x23232323, 0x22222222, 0x21212121, 0x20202020}
- };
- static const DWORD fetch4_data[] = {0x10111213,
0x01f1f202,
0x03f3f404,
0x20212223};
- static struct
- {
IDirect3DVertexShader9 *vs;
IDirect3DPixelShader9 *ps;
const DWORD *ps_code;
const char *name;
BOOL projection; /* The results should be projected
(zoomed by 2) */
BOOL allow_off; /* Do not enforce Fetch4 enabled
on this one on Windows */
- }
- shaders[] =
- {
{NULL, NULL, NULL, "FFP", FALSE, FALSE},
{NULL, NULL, ps_code_texld, "texld", FALSE, FALSE},
{NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE},
{NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE},
{NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE},
{NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE},
- };
- static const struct
- {
D3DFORMAT format; /* The format of the texture */
DWORD data; /* The data we will write to the
first line */
UINT x, y; /* Where we expect the color to be
*/
BOOL broken_wine; /* Do not check it on wine because
is known ot be broken */
D3DCOLOR color_amd[3]; /* Wine results. Results on AMD
swizzle + texture offset */
D3DCOLOR color_intel[3]; /* Results with intel UHD 620,
intel swizzle + no texel offset */
- }
- format_tests[] =
- {
/* Enabled formats */
{D3DFMT_L8, 0xff804010, 360, 270, FALSE,
{0x00004010, 0x00004010, 0x10400000},
{0x40001000, 0x40001000, 0x40001000}
},
{D3DFMT_L16, 0xff804010, 360, 270, FALSE,
{0x0000ff40, 0x0000ff40, 0x40ff0000},
{0xff004000, 0xff004000, 0xff004000}
},
{D3DFMT_R16F, 0x38003c00, 360, 270, FALSE,
{0x000080ff, 0x000080ff, 0xff800000},
{0x8000ff00, 0x8000ff00, 0x8000ff00}
},
{D3DFMT_R32F, 0x3f000000, 360, 270, FALSE,
{0x00000080, 0x00000080, 0x80000000},
{0x00008000, 0x00008000, 0x00008000}
},
/* Disabled format on Intel, enabled on AMD, broken on wine
* since it is implemented with GL_ALPHA, and fetch4 will
fetch RED value */
{D3DFMT_A8, 0xff804010, 360, 270, TRUE,
{0x00004010, 0x00004010, 0x10400000},
{0x00000000, 0x00000000, 0x00000000}
},
/* Disabled format */
{D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE,
{0x00000000, 0x00000000, 0xff804010},
{0x00000000, 0x00000000, 0xff804010}
},
- };
- static const struct
- {
D3DCOLOR color_off, color_amd, color_intel;
UINT x, y;
- }
- expected_depth[][4] =
- {
{
/* This is the expected result for shadow samplers */
{0xffffffff,0xffffffff,0xffffffff, 20, 15},
{0xffffffff,0xffffffff,0xffffffff,260, 15},
{0x00000000,0x00000000,0x00000000, 20,255},
{0x00000000,0x00000000,0x00000000,260,135},
},
{
/* This is the expected result with DF16 */
{0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
},
{
/* This is the expected result with DF24 */
{0xffff0000,0xffdfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
}
- };
- static const struct
- {
D3DFORMAT format;
const char *name;
UINT index;
- }
- depth_tests[] =
- {
{D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0},
{D3DFMT_D32, "D32", 0},
{D3DFMT_D15S1, "D15S1", 0},
{D3DFMT_D24S8, "D24S8", 0},
{D3DFMT_D24X8, "D24X8", 0},
{D3DFMT_D24X4S4, "D24X4S4", 0},
{D3DFMT_D16, "D16", 0},
{D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0},
{D3DFMT_D24FS8, "D24FS8", 0},
{MAKEFOURCC('D','F','1','6'), "DF16", 1},
{MAKEFOURCC('D','F','2','4'), "DF24", 2},
- };
- const BOOL isWin = strcmp(winetest_platform, "wine");
- IDirect3DSurface9 *original_ds, *original_rt, *rt;
- IDirect3DVolumeTexture9 *texture3D;
- IDirect3DPixelShader9 *ps_3d;
- struct surface_readback rb;
- IDirect3DVertexShader9 *vs;
- IDirect3DTexture9 *texture;
- IDirect3DDevice9 *device;
- D3DLOCKED_RECT lr;
- D3DLOCKED_BOX lb;
- IDirect3D9 *d3d;
- ULONG refcount;
- D3DCAPS9 caps;
- UINT i, j, k;
- HWND window;
- HRESULT hr;
- window = create_window();
- d3d = Direct3DCreate9(D3D_SDK_VERSION);
- ok(!!d3d, "Failed to create a D3D object.\n");
- if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE,
MAKEFOURCC('D','F','2','4'))))
- {
skip("No DF24 support, skipping FETCH4 test.\n");
goto done;
- }
- if (!(device = create_device(d3d, window, window, TRUE)))
- {
skip("Failed to create a D3D device, skipping tests.\n");
goto done;
- }
- hr = IDirect3DDevice9_GetDeviceCaps(device, &caps);
- ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr);
- if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0))
- {
skip("No pixel shader 3.0 support, skipping FETCH4 test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
- hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt);
- ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds);
- ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8,
D3DFMT_A8R8G8B8,
D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL);
- ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr);
- /* Create our texture for FETCH4 shader testing */
- hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8,
D3DPOOL_MANAGED, &texture, NULL);
- ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
- hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
- hr = IDirect3DTexture9_UnlockRect(texture, 0);
- ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
- /* Create vertex shader */
- hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs);
- ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned
%08x\n", hr);
- /* Prepare the pixel shaders */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
if (shaders[i].ps_code)
{
hr = IDirect3DDevice9_CreatePixelShader(device,
shaders[i].ps_code, &shaders[i].ps);
ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n",
hr);
/* Copy vertex shader pointer if a PS is present */
shaders[i].vs = vs;
}
- }
- hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d,
&ps_3d);
- ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 |
D3DFVF_TEXCOORDSIZE3(0));
- ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE,
D3DZB_TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC,
D3DCMP_ALWAYS);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE,
TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING,
FALSE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9
*)texture);
- ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
- /* According to the spec, FETCH4 is enabled when
D3DSAMP_MIPMAPLODBIAS == GET4 and also
* D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is
needed for it to get active.
* However, AMD HW r500 samples always as if POINT (nearest
filtering) is selected with FETCH4
* the driver later on corrected this by adding -0.5 texel coord.
*/
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4'));
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MAGFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MINFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/***********************************************************************
* Tests for FFP/PS correctness when using L8 texture with fetch4.
***********************************************************************/
- /* Render with fetch4 and test if we obtain proper results for all
sampler FFP/PS instructions */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET |
D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd =
expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
ok(color_match(color, color_amd, 1)
|| (isWin && (color_match(color, color_intel, 1)
|| shaders[i].allow_off)),
"Test %s Expected color 0x%08x at (%u, %u), got
0x%08x.\n", shaders[i].name,
color_amd, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
/***************************************************************************
* Tests for fetch4 enable/disable with different texture formats
in FFP/PS. *
***************************************************************************/
- /* Create the textures to test FETCH4 does work/not work there as
expected */
- for (i = 0; i < ARRAY_SIZE(format_tests); ++i)
- {
IDirect3DTexture9 *tex;
hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0,
format_tests[i].format,
D3DPOOL_MANAGED, &tex, NULL);
ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0);
ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
memcpy(lr.pBits, &format_tests[i].data, 4);
hr = IDirect3DTexture9_UnlockRect(tex, 0);
ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)tex);
ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
/* Test if FETCH4 is enabled/disabled when different textures
are used with FFP/texld/texldp */
for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j)
{
hr = IDirect3DDevice9_SetVertexShader(device,
shaders[j].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr
%#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device,
shaders[j].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL,
D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
D3DCOLOR color = get_readback_color(&rb,
format_tests[i].x, format_tests[i].y);
D3DCOLOR color_amd = format_tests[i].color_amd[j];
D3DCOLOR color_intel = format_tests[i].color_intel[j];
todo_wine_if(format_tests[i].broken_wine)
ok(color_match(color, color_amd, 1)
|| (isWin && color_match(color, color_intel, 1)),
"Test %d,%s expected color 0x%08x at (%u, %u), got
0x%08x.\n", i, shaders[j].name,
color_amd, format_tests[i].x, format_tests[i].y,
color);
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
IDirect3DTexture9_Release(tex);
- }
- /**************************************************
* Tests that fetch4 works with 3D textures. *
**************************************************/
- /* Create volume (3D) texture */
- IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0,
D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL );
- ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr);
- hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
- {
memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
/* Shift the lower level, to keep it different */
memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch,
&fetch4_data[(i+1)%4], sizeof(fetch4_data[i]));
- }
- hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0);
- ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9
*)texture3D);
- ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
- /* Test FFP and texld with dcl_volume (ps_3d) */
- for (i = 0; i < 2; ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET |
D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd = expected_colors.color_amd[j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
D3DCOLOR color_off =
expected_colors.color_3d_fetch4_off[j];
D3DCOLOR color_zround = expected_colors.color_amd[(j+4) %
ARRAY_SIZE(expected_colors.color_amd)];
/* FIXME: Fetch4 on 3D textures have different results
based on the vendor/driver
* - AMD "HD 5700" rounds to nearest "z" texel, and does
fetch4 normally on .xy
* - AMD "R500" has fetch4 disabled
* - AMD "R580" has fetch4 enabled sampling at .xy0
* - Intel UHD 620 sample with fetch4 at .xy0
* Currently unimplemented on wine due to lack of GL
functionality to cast 3D->2DArray
* Wine produces same results as if fetch4 is not enabled
(which probably is better)
* Test will pass on windows if either one of the allowed
results is returned */
if(isWin)
ok(color_match(color, color_zround, 2) ||
color_match(color, color_off, 2)
|| color_match(color, color_intel, 2) ||
color_match(color, color_amd, 2),
"Test 3D %s Expected colors 0x%08x || 0x%08x
|| 0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n",
shaders[i].name, color_amd, color_zround,
color_off, color_intel, x, y, color);
else
ok(color_match(color, color_off, 2),
"Test 3D %s Expected color 0x%08x at (%u, %u),
got 0x%08x.\n", shaders[i].name,
color_off, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
- /********************************************************
* Tests for fetch4 enable/disable with depth textures. *
********************************************************/
- for (i = 0; i < ARRAY_SIZE(depth_tests); ++i)
- {
D3DFORMAT format = depth_tests[i].format;
IDirect3DTexture9 *depth_texture;
IDirect3DSurface9 *ds;
if (FAILED(IDirect3D9_CheckDeviceFormat(d3d,
D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL,
D3DRTYPE_TEXTURE, format)))
continue;
hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1,
D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT,
&depth_texture, NULL);
ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr);
hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds);
ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetVertexShader(device, NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)texture);
ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1'));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Setup the depth/stencil surface. */
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER,
0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
/* Render to the depth surface */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
IDirect3DSurface9_Release(ds);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)depth_texture);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
/* Set a shader for depth sampling, otherwise windows does not
show anything */
hr = IDirect3DDevice9_SetVertexShader(device, vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps);
/* same as texld */
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
for (j = 0; j < 2; ++j){
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ?
'4' : '1' ));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Do the actual shadow mapping. */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (k = 0; k <
ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k)
{
UINT x = expected_depth[depth_tests[i].index][k].x;
UINT y = expected_depth[depth_tests[i].index][k].y;
D3DCOLOR color_off =
expected_depth[depth_tests[i].index][k].color_off;
D3DCOLOR color_amd =
expected_depth[depth_tests[i].index][k].color_amd;
D3DCOLOR color_intel =
expected_depth[depth_tests[i].index][k].color_intel;
D3DCOLOR color = get_readback_color(&rb, x, y);
/* When Fetch4 is OFF, ignore G and B channels on
windows.
* Some implementations will copy R=G=B, some will set
them to 0 */
if(j == 0)
ok((isWin && color_match(color & 0xffff0000,
color_off & 0xffff0000, 2))
|| color_match(color, color_off, 2),
"Test OFF Expected color 0x%08x at (%u,
%u) for format %s, got 0x%08x.\n",
color_off, x, y, depth_tests[i].name,
color);
else
ok(color_match(color, color_amd, 2)
|| (isWin && color_match(color,
color_intel, 2)),
"Test ON Expected colors 0x%08x || 0x%08x
at (%u, %u) for format %s, got 0x%08x.\n",
color_amd, color_intel, x, y,
depth_tests[i].name, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
hr = IDirect3DDevice9_SetTexture(device, 0, NULL);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
IDirect3DTexture9_Release(depth_texture);
- }
- IDirect3DVolumeTexture9_Release(texture3D);
- IDirect3DTexture9_Release(texture);
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
if (shaders[i].ps)
IDirect3DPixelShader9_Release(shaders[i].ps);
- IDirect3DPixelShader9_Release(ps_3d);
- IDirect3DVertexShader9_Release(vs);
- IDirect3DSurface9_Release(rt);
- IDirect3DSurface9_Release(original_ds);
- IDirect3DSurface9_Release(original_rt);
- refcount = IDirect3DDevice9_Release(device);
- ok(!refcount, "Device has %u references left.\n", refcount);
+done:
- IDirect3D9_Release(d3d);
- DestroyWindow(window);
+}
- static void shadow_test(void) { static const DWORD ps_code[] =
@@ -24660,6 +25303,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test();
- fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
Hi,
I managed to get FETCH4 working on 3dmark06 as well. I got minor vertex explosions on the firefly test, i am unsure if this is fetch4 related, or the hack I did to make fetch4 be used. (disabling some big chunk of formats) However, all lights looked ok on the tests.
I tested using stencil, and without stencil. Got same performance results, but I agree that DF24 has no stencil, so I am changing that in next patch version . Also, I tested with ATI1/ATI2/INTZ, but no luck getting FETCH4 working on ATI1/2. I may have set the texture values wrong, i will give it a more thorough test tomorrow.
Regarding the INTZ. Interestingly.... Intel does support FETCH4 on it! I also need to double check it, since maybe I tested it wrong. If it is true that is on, it is quite an interesting edge case, a vendor specific format using another vendor specific sampling. We should test it on AMD as well.
BR, Daniel
El sáb., 2 feb. 2019 a las 20:16, DarkZeros ([email protected]) escribió:
Good point about the other 1 channel textures. I will add them to the lists of tests. I was just not sure how many texture formats have fetch4 enabled. Took just the ones that existed in D3D9 + DF16/DF24.
I think if the current state of tests will not pass on nine we can change them. Or set them as todo() or broken() if they fail on a platform that is not wine. Because anyway, it is not a failure we can fix if it os not running in wine. That will also cover possible cases that fail in the future on windows implementations of fetch4.
BR, Daniel
On Sat, 2 Feb 2019, 20:07 Axel Davy <[email protected] wrote:
Hi,
L8 is xxx1, thus sampling it with FETCH4 is different for the A coordinate.
I understand your point for disabling it in wine for complex cases. But from what I'm used to in visual tests, nine doesn't do always the same things as wine but it isn't a problem as the wine behavior is not specifically checked when on wine. And we pass some wine_todo. With your approach we would fail even if we implement a correct behavior. But, of course, it is up to wine developers, if they want wine tests to fail on nine because it doesn't do exactly like wine.
You didn't comment on ATI1 and INTZ. Both have only one channel, so I would assume it's an interesting thing to test for FETCH4.
Axel
On 02/02/2019 20:50, DarkZeros wrote:
Hi Axel,
Thanks for testing with 3dmark06. I tried to run it without luck yesterday, apparently there is some winetrick that i am missing...
Regarding the linear sampler state. I havent seen any case where setting the filters to LINEAR disables fetch4. From the tests, it only changes slightly the results on R500 AMD by changing the 0.5 texel offset.
My guess, is that all apps that use fetch4 have that into account, and disable it with LODBIAS explicitly. Sampling a 1x1 L8 texture with fetch4 will return the same value on RGBA, which is similar to a normal sample.
Regarding 3D textures, all implementations behave differently. Since 3D textures have 8 texels, returning the 4 texels used for linear sampling is quite ambiguous. Intel decides to sample at .xy0 and ignore the Z axis. Some AMD devices disable fetch4, some others sample at the nearest z coordinate.
In my opinion, we are hitting implementation bugs/corner cases. It is simpler to just keep fetch4 off there. Especially since implementing it is quite difficult.
The tests of 3D textures in wine are just testing that fetch4 is off (no todo, it should be always off). But on windows, it tests that some of the 4 possibles cases seen on AMD and intel is obtained. How useful those tests are, i am not sure. I just left them there for reference.
Maybe we should remove those tests on windows, or even add todo in some of the cases for 3d. But in that case, what implementation do we want to follow?
Best regards, Daniel
On Sat, 2 Feb 2019, 19:14 Axel Davy <[email protected] wrote:
Hi,
Apparently 3DMark06 believes somehow that LINEAR disables FETCH4. It sets MIPFILTER, MINFILTER and MAGFILTER to LINEAR, and resets ADDRESSV, ADDRESSU and MIPMAPLODBIAS (1, 1, 0 respectively) then behaves as if FETCH4 was disabled (it later disables it definitely with the MIPMAPLODBIAS setting).
I don't see any obvious visual glitch, though. The texture sampled with FETCH4 which isn't meant to be sampled with it (looking at how the shader values are used) is a D3DFMT_L8 of size 1x1...
I think all this points out that one has to be very careful about FETCH4 corner cases.
Thus I would suggest adding more formats and checks to your code, like for example INTZ, ATI1, ATI2 ?
For the part "Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray", shouldn't the test be with a wine_todo rather than a isWin check ?
Axel
On 01/02/2019 01:56, Daniel Ansorregui wrote:
- Test texld/texldp/texldd/texldb/texldl in PS and FFP
- Test supported/unsupported texture formats on FFP/texld/texldp
- Test 3dtextures (Disabled, each platform has different results)
- Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on
windows)
Signed-off-by: Daniel Ansorregui [email protected]
dlls/d3d9/tests/visual.c | 644
+++++++++++++++++++++++++++++++++++++++
1 file changed, 644 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..028fc23078 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,649 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{
- static const DWORD vs_code[] =
- {
0xfffe0300,
/* vs_3_0 */
0x0200001f, 0x80000000, 0x900f0000,
/* dcl_position v0 */
0x0200001f, 0x80000005, 0x900f0001,
/* dcl_texcoord v1 */
0x0200001f, 0x80000000, 0xe00f0000,
/* dcl_position o0 */
0x0200001f, 0x80000005, 0xe00f0001,
/* dcl_texcoord o1 */
0x02000001, 0xe00f0000, 0x90e40000,
/* mov o0, v0 */
0x02000001, 0xe00f0001, 0x90e40001,
/* mov o1, v1 */
0x0000ffff
- };
- static const DWORD ps_code_texld[] =
- {
/* Test texld */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const DWORD ps_code_texldp[] =
- {
/* Test texldp : AMD and Wine uses the projection on Fetch4,
Intel UHD 620 does not apply it */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000,
0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */
0x02000001, 0x80030000, 0x90540000,
/* mov r0.xy, v0.xyyy */
0x02000001, 0x800c0000, 0xa0fe0000,
/* mov r0.zw, c0.zwww */
0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldp r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldd[] =
- {
/* Test texldd : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Sampling LOD gradient should be ignored.
Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4
on texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000,
0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */
0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000,
0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */
0x02000001, 0x800f0002, 0xa0e40000,
/* mov r2, c0 */
0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000,
0x80e40002, /* texldd r0, v0, s0, c0, r2 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldb[] =
- {
/* Test texldb : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4
on texldb */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000,
0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldb r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_texldl[] =
- {
/* Test texldl : Fetch4 uses the same D3D state as LOD bias,
therefore disables LOD.
* The explicit LOD level is then ignored. Same
result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4
on texldl */
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800,
/* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000,
0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */
0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000,
/* add r0, v0, c0 */
0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800,
/* texldl r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff,
/* end */
- };
- static const DWORD ps_code_3d[] =
- {
0xffff0300,
/* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000,
/* dcl_texcoord v0 */
0x0200001f, 0xa0000000, 0xa00f0800,
/* dcl_volume s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800,
/* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000,
/* mov oC0, r0 */
0x0000ffff
/* end */
- };
- static const struct
- {
struct vec3 position;
struct vec3 texcoord;
- }
- quad[] =
- {
/* Tilted on Z axis to get a depth gradient in the depth test
*/
/* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth
tests */
{{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} },
{{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} },
{{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} },
{{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} }
- };
- static const struct
- {
UINT x[4], y[4]; /* Matrix Sampling positions */
D3DCOLOR color_amd[16]; /* AMD original implementation
swizzle with -0.5 texel coord */
D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation
swizzle with no texel coord correction */
/* Wine follows the AMD immplementation, and consider an
error the Intel one results
* However, the test will accept as valid the intel only if
running on windows */
D3DCOLOR color_3d_fetch4_off[16];
D3DCOLOR color_fetch4_off[16];
- }
- expected_colors =
- {
{ 40, 200, 360, 520},
{ 30, 150, 270, 390},
/* AMD implementation - Wine implementation */
{0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102,
0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304,
0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023,
0x23221312, 0x22211211, 0x21201110, 0x20231013},
/* Intel UHD 620 implementation */
{0x23102013, 0x22132312, 0x21122211, 0x20112110,
0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101,
0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103,
0x04200323, 0xf4230422, 0xf322f421, 0x0321f320},
/* Fetch4 off on 3D textures */
{0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101,
0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303,
0xff232323, 0xff222222, 0xff212121, 0xff202020,
0xff131313, 0xff121212, 0xff111111, 0xff101010},
/* Fetch4 off on 2D texture */
{0x13131313, 0x12121212, 0x11111111, 0x10101010,
0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101,
0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303,
0x23232323, 0x22222222, 0x21212121, 0x20202020}
- };
- static const DWORD fetch4_data[] = {0x10111213,
0x01f1f202,
0x03f3f404,
0x20212223};
- static struct
- {
IDirect3DVertexShader9 *vs;
IDirect3DPixelShader9 *ps;
const DWORD *ps_code;
const char *name;
BOOL projection; /* The results should be
projected (zoomed by 2) */
BOOL allow_off; /* Do not enforce Fetch4 enabled
on this one on Windows */
- }
- shaders[] =
- {
{NULL, NULL, NULL, "FFP", FALSE, FALSE},
{NULL, NULL, ps_code_texld, "texld", FALSE, FALSE},
{NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE},
{NULL, NULL, ps_code_texldd, "texldd", FALSE, TRUE},
{NULL, NULL, ps_code_texldb, "texldb", FALSE, TRUE},
{NULL, NULL, ps_code_texldl, "texldl", FALSE, TRUE},
- };
- static const struct
- {
D3DFORMAT format; /* The format of the texture */
DWORD data; /* The data we will write to the
first line */
UINT x, y; /* Where we expect the color to
be */
BOOL broken_wine; /* Do not check it on wine
because is known ot be broken */
D3DCOLOR color_amd[3]; /* Wine results. Results on AMD
swizzle + texture offset */
D3DCOLOR color_intel[3]; /* Results with intel UHD 620,
intel swizzle + no texel offset */
- }
- format_tests[] =
- {
/* Enabled formats */
{D3DFMT_L8, 0xff804010, 360, 270, FALSE,
{0x00004010, 0x00004010, 0x10400000},
{0x40001000, 0x40001000, 0x40001000}
},
{D3DFMT_L16, 0xff804010, 360, 270, FALSE,
{0x0000ff40, 0x0000ff40, 0x40ff0000},
{0xff004000, 0xff004000, 0xff004000}
},
{D3DFMT_R16F, 0x38003c00, 360, 270, FALSE,
{0x000080ff, 0x000080ff, 0xff800000},
{0x8000ff00, 0x8000ff00, 0x8000ff00}
},
{D3DFMT_R32F, 0x3f000000, 360, 270, FALSE,
{0x00000080, 0x00000080, 0x80000000},
{0x00008000, 0x00008000, 0x00008000}
},
/* Disabled format on Intel, enabled on AMD, broken on wine
* since it is implemented with GL_ALPHA, and fetch4 will
fetch RED value */
{D3DFMT_A8, 0xff804010, 360, 270, TRUE,
{0x00004010, 0x00004010, 0x10400000},
{0x00000000, 0x00000000, 0x00000000}
},
/* Disabled format */
{D3DFMT_A8R8G8B8, 0xff804010, 360, 270, FALSE,
{0x00000000, 0x00000000, 0xff804010},
{0x00000000, 0x00000000, 0xff804010}
},
- };
- static const struct
- {
D3DCOLOR color_off, color_amd, color_intel;
UINT x, y;
- }
- expected_depth[][4] =
- {
{
/* This is the expected result for shadow samplers */
{0xffffffff,0xffffffff,0xffffffff, 20, 15},
{0xffffffff,0xffffffff,0xffffffff,260, 15},
{0x00000000,0x00000000,0x00000000, 20,255},
{0x00000000,0x00000000,0x00000000,260,135},
},
{
/* This is the expected result with DF16 */
{0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
},
{
/* This is the expected result with DF24 */
{0xffff0000,0xffdfdfbf,0x202000ff, 20, 15},
{0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15},
{0xff800000,0x7f5f5f3f,0x9f000080, 20,255},
{0xff600000,0x5f3f3f1f,0x80809f60,260,135},
}
- };
- static const struct
- {
D3DFORMAT format;
const char *name;
UINT index;
- }
- depth_tests[] =
- {
{D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0},
{D3DFMT_D32, "D32", 0},
{D3DFMT_D15S1, "D15S1", 0},
{D3DFMT_D24S8, "D24S8", 0},
{D3DFMT_D24X8, "D24X8", 0},
{D3DFMT_D24X4S4, "D24X4S4", 0},
{D3DFMT_D16, "D16", 0},
{D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0},
{D3DFMT_D24FS8, "D24FS8", 0},
{MAKEFOURCC('D','F','1','6'), "DF16", 1},
{MAKEFOURCC('D','F','2','4'), "DF24", 2},
- };
- const BOOL isWin = strcmp(winetest_platform, "wine");
- IDirect3DSurface9 *original_ds, *original_rt, *rt;
- IDirect3DVolumeTexture9 *texture3D;
- IDirect3DPixelShader9 *ps_3d;
- struct surface_readback rb;
- IDirect3DVertexShader9 *vs;
- IDirect3DTexture9 *texture;
- IDirect3DDevice9 *device;
- D3DLOCKED_RECT lr;
- D3DLOCKED_BOX lb;
- IDirect3D9 *d3d;
- ULONG refcount;
- D3DCAPS9 caps;
- UINT i, j, k;
- HWND window;
- HRESULT hr;
- window = create_window();
- d3d = Direct3DCreate9(D3D_SDK_VERSION);
- ok(!!d3d, "Failed to create a D3D object.\n");
- if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT,
D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE,
MAKEFOURCC('D','F','2','4'))))
- {
skip("No DF24 support, skipping FETCH4 test.\n");
goto done;
- }
- if (!(device = create_device(d3d, window, window, TRUE)))
- {
skip("Failed to create a D3D device, skipping tests.\n");
goto done;
- }
- hr = IDirect3DDevice9_GetDeviceCaps(device, &caps);
- ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr);
- if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0))
- {
skip("No pixel shader 3.0 support, skipping FETCH4 test.\n");
IDirect3DDevice9_Release(device);
goto done;
- }
- hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt);
- ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_GetDepthStencilSurface(device,
&original_ds);
- ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8,
D3DFMT_A8R8G8B8,
D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL);
- ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr);
- /* Create our texture for FETCH4 shader testing */
- hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0,
D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL);
- ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
- hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
- hr = IDirect3DTexture9_UnlockRect(texture, 0);
- ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
- /* Create vertex shader */
- hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs);
- ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned
%08x\n", hr);
- /* Prepare the pixel shaders */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
if (shaders[i].ps_code)
{
hr = IDirect3DDevice9_CreatePixelShader(device,
shaders[i].ps_code, &shaders[i].ps);
ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n",
hr);
/* Copy vertex shader pointer if a PS is present */
shaders[i].vs = vs;
}
- }
- hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d,
&ps_3d);
- ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 |
D3DFVF_TEXCOORDSIZE3(0));
- ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE,
D3DZB_TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC,
D3DCMP_ALWAYS);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE,
TRUE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING,
FALSE);
- ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)texture);
- ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
- /* According to the spec, FETCH4 is enabled when
D3DSAMP_MIPMAPLODBIAS == GET4 and also
* D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4
is needed for it to get active.
* However, AMD HW r500 samples always as if POINT (nearest
filtering) is selected with FETCH4
* the driver later on corrected this by adding -0.5 texel coord.
*/
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4'));
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MAGFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MINFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPFILTER, D3DTEXF_POINT);
- ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/***********************************************************************
* Tests for FFP/PS correctness when using L8 texture with
fetch4. *
***********************************************************************/
- /* Render with fetch4 and test if we obtain proper results for
all sampler FFP/PS instructions */
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET
| D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd =
expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
ok(color_match(color, color_amd, 1)
|| (isWin && (color_match(color, color_intel, 1)
|| shaders[i].allow_off)),
"Test %s Expected color 0x%08x at (%u, %u), got
0x%08x.\n", shaders[i].name,
color_amd, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
/***************************************************************************
* Tests for fetch4 enable/disable with different texture formats
in FFP/PS. *
***************************************************************************/
- /* Create the textures to test FETCH4 does work/not work there as
expected */
- for (i = 0; i < ARRAY_SIZE(format_tests); ++i)
- {
IDirect3DTexture9 *tex;
hr = IDirect3DDevice9_CreateTexture(device, 2, 2, 1, 0,
format_tests[i].format,
D3DPOOL_MANAGED, &tex, NULL);
ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr);
hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0);
ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr);
memcpy(lr.pBits, &format_tests[i].data, 4);
hr = IDirect3DTexture9_UnlockRect(tex, 0);
ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)tex);
ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr);
/* Test if FETCH4 is enabled/disabled when different textures
are used with FFP/texld/texldp */
for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j)
{
hr = IDirect3DDevice9_SetVertexShader(device,
shaders[j].vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr
%#x.\n", hr);
hr = IDirect3DDevice9_SetPixelShader(device,
shaders[j].ps);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL,
D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n",
hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
D3DCOLOR color = get_readback_color(&rb,
format_tests[i].x, format_tests[i].y);
D3DCOLOR color_amd = format_tests[i].color_amd[j];
D3DCOLOR color_intel = format_tests[i].color_intel[j];
todo_wine_if(format_tests[i].broken_wine)
ok(color_match(color, color_amd, 1)
|| (isWin && color_match(color, color_intel, 1)),
"Test %d,%s expected color 0x%08x at (%u, %u),
got 0x%08x.\n", i, shaders[j].name,
color_amd, format_tests[i].x, format_tests[i].y,
color);
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
IDirect3DTexture9_Release(tex);
- }
- /**************************************************
* Tests that fetch4 works with 3D textures. *
**************************************************/
- /* Create volume (3D) texture */
- IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0,
D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL );
- ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n",
hr);
- hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0);
- ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr);
- for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i)
- {
memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i],
sizeof(fetch4_data[i]));
/* Shift the lower level, to keep it different */
memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch,
&fetch4_data[(i+1)%4], sizeof(fetch4_data[i]));
- }
- hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0);
- ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr);
- hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)texture3D);
- ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
- /* Test FFP and texld with dcl_volume (ps_3d) */
- for (i = 0; i < 2; ++i)
- {
hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d :
NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET
| D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j)
{
UINT x = expected_colors.x[j % 4];
UINT y = expected_colors.y[j / 4];
D3DCOLOR color = get_readback_color(&rb, x, y);
D3DCOLOR color_amd = expected_colors.color_amd[j];
D3DCOLOR color_intel = expected_colors.color_intel[j];
D3DCOLOR color_off =
expected_colors.color_3d_fetch4_off[j];
D3DCOLOR color_zround = expected_colors.color_amd[(j+4) %
ARRAY_SIZE(expected_colors.color_amd)];
/* FIXME: Fetch4 on 3D textures have different results
based on the vendor/driver
* - AMD "HD 5700" rounds to nearest "z" texel, and does
fetch4 normally on .xy
* - AMD "R500" has fetch4 disabled
* - AMD "R580" has fetch4 enabled sampling at .xy0
* - Intel UHD 620 sample with fetch4 at .xy0
* Currently unimplemented on wine due to lack of GL
functionality to cast 3D->2DArray
* Wine produces same results as if fetch4 is not enabled
(which probably is better)
* Test will pass on windows if either one of the allowed
results is returned */
if(isWin)
ok(color_match(color, color_zround, 2) ||
color_match(color, color_off, 2)
|| color_match(color, color_intel, 2) ||
color_match(color, color_amd, 2),
"Test 3D %s Expected colors 0x%08x || 0x%08x
|| 0x%08x || 0x%08x at (%u, %u), got 0x%08x.\n",
shaders[i].name, color_amd, color_zround,
color_off, color_intel, x, y, color);
else
ok(color_match(color, color_off, 2),
"Test 3D %s Expected color 0x%08x at (%u,
%u), got 0x%08x.\n", shaders[i].name,
color_off, x, y, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
- }
- /********************************************************
* Tests for fetch4 enable/disable with depth textures. *
********************************************************/
- for (i = 0; i < ARRAY_SIZE(depth_tests); ++i)
- {
D3DFORMAT format = depth_tests[i].format;
IDirect3DTexture9 *depth_texture;
IDirect3DSurface9 *ds;
if (FAILED(IDirect3D9_CheckDeviceFormat(d3d,
D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL,
D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL,
D3DRTYPE_TEXTURE, format)))
continue;
hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1,
D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT,
&depth_texture, NULL);
ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr);
hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds);
ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetVertexShader(device, NULL);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, NULL);
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)texture);
ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1'));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr);
/* Setup the depth/stencil surface. */
hr = IDirect3DDevice9_Clear(device, 0, NULL,
D3DCLEAR_ZBUFFER, 0, 0.0f, 0);
ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr);
/* Render to the depth surface */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL);
ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n",
hr);
IDirect3DSurface9_Release(ds);
hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt);
ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_SetTexture(device, 0,
(IDirect3DBaseTexture9 *)depth_texture);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
/* Set a shader for depth sampling, otherwise windows does
not show anything */
hr = IDirect3DDevice9_SetVertexShader(device, vs);
ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n",
hr);
hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps);
/* same as texld */
ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr);
for (j = 0; j < 2; ++j){
hr = IDirect3DDevice9_SetSamplerState(device, 0,
D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j
? '4' : '1' ));
ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n",
hr);
/* Do the actual shadow mapping. */
hr = IDirect3DDevice9_BeginScene(device);
ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device,
D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad));
ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n",
hr);
hr = IDirect3DDevice9_EndScene(device);
ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr);
get_rt_readback(original_rt, &rb);
for (k = 0; k <
ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k)
{
UINT x = expected_depth[depth_tests[i].index][k].x;
UINT y = expected_depth[depth_tests[i].index][k].y;
D3DCOLOR color_off =
expected_depth[depth_tests[i].index][k].color_off;
D3DCOLOR color_amd =
expected_depth[depth_tests[i].index][k].color_amd;
D3DCOLOR color_intel =
expected_depth[depth_tests[i].index][k].color_intel;
D3DCOLOR color = get_readback_color(&rb, x, y);
/* When Fetch4 is OFF, ignore G and B channels on
windows.
* Some implementations will copy R=G=B, some will
set them to 0 */
if(j == 0)
ok((isWin && color_match(color & 0xffff0000,
color_off & 0xffff0000, 2))
|| color_match(color, color_off, 2),
"Test OFF Expected color 0x%08x at (%u,
%u) for format %s, got 0x%08x.\n",
color_off, x, y, depth_tests[i].name,
color);
else
ok(color_match(color, color_amd, 2)
|| (isWin && color_match(color,
color_intel, 2)),
"Test ON Expected colors 0x%08x || 0x%08x
at (%u, %u) for format %s, got 0x%08x.\n",
color_amd, color_intel, x, y,
depth_tests[i].name, color);
}
release_surface_readback(&rb);
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL,
NULL);
ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr);
}
hr = IDirect3DDevice9_SetTexture(device, 0, NULL);
ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr);
IDirect3DTexture9_Release(depth_texture);
- }
- IDirect3DVolumeTexture9_Release(texture3D);
- IDirect3DTexture9_Release(texture);
- for (i = 0; i < ARRAY_SIZE(shaders); ++i)
if (shaders[i].ps)
IDirect3DPixelShader9_Release(shaders[i].ps);
- IDirect3DPixelShader9_Release(ps_3d);
- IDirect3DVertexShader9_Release(vs);
- IDirect3DSurface9_Release(rt);
- IDirect3DSurface9_Release(original_ds);
- IDirect3DSurface9_Release(original_rt);
- refcount = IDirect3DDevice9_Release(device);
- ok(!refcount, "Device has %u references left.\n", refcount);
+done:
- IDirect3D9_Release(d3d);
- DestroyWindow(window);
+}
- static void shadow_test(void) { static const DWORD ps_code[] =
@@ -24660,6 +25303,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test();
- fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Test texld/texldp/texldd/texldb/texldl in PS and FFP - Test supported/unsupported texture formats on FFP/texld/texldp - Test 3dtextures (Disabled, each platform has different results) - Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/d3d9/tests/visual.c | 662 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 662 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..756487deb4 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,667 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + /* Test texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + /* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */ + 0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */ + 0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw, c0.zwww */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Sampling LOD gradient should be ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * The explicit LOD level is then ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldl */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_3d[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + + static const struct + { + struct vec3 position; + struct vec3 texcoord; + } + quad[] = + { + /* Tilted on Z axis to get a depth gradient in the depth test */ + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } + }; + + static const struct + { + UINT x[4], y[4]; /* Matrix Sampling positions */ + D3DCOLOR color_amd[16]; /* AMD original implementation swizzle with -0.5 texel coord */ + D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation swizzle with no texel coord correction */ + /* Wine follows the AMD immplementation, and consider an error the Intel one results + * However, the test will accept as valid the intel only if running on windows */ + D3DCOLOR color_3d_fetch4_off[16]; + D3DCOLOR color_fetch4_off[16]; + } + expected_colors = + { + { 40, 200, 360, 520}, + { 30, 150, 270, 390}, + /* AMD implementation - Wine implementation */ + {0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102, + 0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304, + 0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023, + 0x23221312, 0x22211211, 0x21201110, 0x20231013}, + /* Intel UHD 620 implementation */ + {0x23102013, 0x22132312, 0x21122211, 0x20112110, + 0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101, + 0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103, + 0x04200323, 0xf4230422, 0xf322f421, 0x0321f320}, + /* Fetch4 off on 3D textures */ + {0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101, + 0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303, + 0xff232323, 0xff222222, 0xff212121, 0xff202020, + 0xff131313, 0xff121212, 0xff111111, 0xff101010}, + /* Fetch4 off on 2D texture */ + {0x13131313, 0x12121212, 0x11111111, 0x10101010, + 0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101, + 0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303, + 0x23232323, 0x22222222, 0x21212121, 0x20202020} + }; + + static const DWORD fetch4_data[] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + + static struct + { + IDirect3DVertexShader9 *vs; + IDirect3DPixelShader9 *ps; + const DWORD *ps_code; + const char *name; + BOOL projection; /* The results should be projected (zoomed by 2) */ + } + shaders[] = + { + {NULL, NULL, NULL, "FFP", FALSE}, + {NULL, NULL, ps_code_texld, "texld", FALSE}, + {NULL, NULL, ps_code_texldp, "texldp", TRUE}, + {NULL, NULL, ps_code_texldd, "texldd", FALSE}, + {NULL, NULL, ps_code_texldb, "texldb", FALSE}, + {NULL, NULL, ps_code_texldl, "texldl", FALSE}, + }; + + static const struct + { + BOOL broken_wine; /* Do not check it on wine because is known to be broken */ + const char * name; /* The name of the texture format */ + D3DFORMAT format; /* The format of the texture */ + DWORD data; /* The data we will write to the first line */ + UINT x, y; /* Where we expect the color to be */ + UINT w, h; /* Texture size */ + D3DCOLOR color_amd[3]; /* Wine results. Results on AMD swizzle + texture offset */ + D3DCOLOR color_intel[3]; /* Results with intel UHD 620, intel swizzle + no texel offset */ + } + format_tests[] = + { + /* Enabled formats */ + {FALSE, "L8", D3DFMT_L8, + 0xff804010, 360, 270, 2, 2, + {0x00004010, 0x00004010, 0x10400000}, + {0x40001000, 0x40001000, 0x40001000} + }, + {FALSE, "L16", D3DFMT_L16, + 0xff804010, 360, 270, 2, 2, + {0x0000ff40, 0x0000ff40, 0x40ff0000}, + {0xff004000, 0xff004000, 0xff004000} + }, + {FALSE, "R16F", D3DFMT_R16F, + 0x38003c00, 360, 270, 2, 2, + {0x000080ff, 0x000080ff, 0xff800000}, + {0x8000ff00, 0x8000ff00, 0x8000ff00} + }, + {FALSE, "R32F", D3DFMT_R32F, + 0x3f000000, 360, 270, 2, 2, + {0x00000080, 0x00000080, 0x80000000}, + {0x00008000, 0x00008000, 0x00008000} + }, + {FALSE, "ATI1", MAKEFOURCC('A','T','I','1'), + 0xb97700ff, 360, 60, 4, 4, + {0x6d6d6d6d, 0x6d6d6d6d, 0x49494949}, + {0x00008000, 0x00008000, 0x00008000} + }, + + /* Disabled format on Intel, enabled on AMD, broken on wine + * since it is implemented with GL_ALPHA, and fetch4 will fetch RED value */ + {TRUE, "A8", D3DFMT_A8, + 0xff804010, 360, 270, 2, 2, + {0x00004010, 0x00004010, 0x10400000}, + {0x00000000, 0x00000000, 0x00000000} + }, + + /* Disabled format */ + {FALSE, "A8R8G8B8", D3DFMT_A8R8G8B8, + 0xff804010, 360, 270, 2, 2, + {0x00000000, 0x00000000, 0xff804010}, + {0x00000000, 0x00000000, 0xff804010} + }, + }; + + static const struct + { + D3DCOLOR color_off, color_amd, color_intel; + UINT x, y; + } + expected_depth[][4] = + { + { + /* This is the expected result for shadow samplers */ + {0xffffffff,0xffffffff,0xffffffff, 20, 15}, + {0xffffffff,0xffffffff,0xffffffff,260, 15}, + {0x00000000,0x00000000,0x00000000, 20,255}, + {0x00000000,0x00000000,0x00000000,260,135}, + }, + { + /* This is the expected result with DF16 */ + {0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + }, + { + /* This is the expected result with DF24 */ + {0xffff0000,0xffdfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + }, + { + /* This is the expected result with INTZ */ + {0xffffffff,0xffdfdfbf,0x202000ff, 20, 15}, + {0x9f9f9f9f,0x9f7f7f5f,0x00bf009f,260, 15}, + {0x7f7f7f7f,0x7f5f5f3f,0x9f000080, 20,255}, + {0x5f5f5f5f,0x5f3f3f1f,0x80809f60,260,135}, + } + }; + + static const struct + { + D3DFORMAT format; + const char *name; + UINT index; + } + depth_tests[] = + { + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, + {D3DFMT_D32, "D32", 0}, + {D3DFMT_D15S1, "D15S1", 0}, + {D3DFMT_D24S8, "D24S8", 0}, + {D3DFMT_D24X8, "D24X8", 0}, + {D3DFMT_D24X4S4, "D24X4S4", 0}, + {D3DFMT_D16, "D16", 0}, + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, + {D3DFMT_D24FS8, "D24FS8", 0}, + {MAKEFOURCC('D','F','1','6'), "DF16", 1}, + {MAKEFOURCC('D','F','2','4'), "DF24", 2}, + {MAKEFOURCC('I','N','T','Z'), "INTZ", 3}, + }; + + const BOOL isWin = strcmp(winetest_platform, "wine"); + + IDirect3DSurface9 *original_ds, *original_rt, *rt; + IDirect3DVolumeTexture9 *texture3D; + IDirect3DPixelShader9 *ps_3d; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DTexture9 *texture; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + D3DLOCKED_BOX lb; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + UINT i, j, k; + HWND window; + HRESULT hr; + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 shader testing */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + /* Prepare the pixel shaders */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + if (shaders[i].ps_code) + { + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + /* Copy vertex shader pointer if a PS is present */ + shaders[i].vs = vs; + } + } + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 and also + * D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed for it to get active. + * However, AMD HW r500 samples always as if POINT (nearest filtering) is selected with FETCH4 + * the driver later on corrected this by adding -0.5 texel coord. */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /*********************************************************************** + * Tests for FFP/PS correctness when using L8 texture with fetch4. * + ***********************************************************************/ + + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j]; + D3DCOLOR color_intel = expected_colors.color_intel[j]; + ok(color_match(color, color_amd, 1) || broken(color_match(color, color_intel, 1)), + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_amd, x, y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /*************************************************************************** + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * + ***************************************************************************/ + + /* Create the textures to test FETCH4 does work/not work there as expected */ + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) + { + IDirect3DTexture9 *tex; + hr = IDirect3DDevice9_CreateTexture(device, format_tests[i].w, format_tests[i].h, + 1, 0, format_tests[i].format, D3DPOOL_MANAGED, &tex, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &format_tests[i].data, 4); + hr = IDirect3DTexture9_UnlockRect(tex, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ + for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); + D3DCOLOR color_amd = format_tests[i].color_amd[j]; + D3DCOLOR color_intel = format_tests[i].color_intel[j]; + todo_wine_if(format_tests[i].broken_wine) ok(color_match(color, color_amd, 1) + || broken(color_match(color, color_intel, 1)), + "Test %s on %s expected color 0x%08x at (%u, %u), got 0x%08x.\n", format_tests[i].name, + shaders[j].name, color_amd, format_tests[i].x, format_tests[i].y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + IDirect3DTexture9_Release(tex); + } + + /************************************************** + * Tests that fetch4 works with 3D textures. * + **************************************************/ + + /* Create volume (3D) texture */ + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + { + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); + /* Shift the lower level, to keep it different */ + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); + } + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + + /* Test FFP and texld with dcl_volume (ps_3d) */ + for (i = 0; i < 2; ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[j]; + D3DCOLOR color_intel = expected_colors.color_intel[j]; + D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j]; + D3DCOLOR color_zround = expected_colors.color_amd[(j+4) % ARRAY_SIZE(expected_colors.color_amd)]; + /* Note: Fetch4 on 3D textures have different results based on the vendor/driver + * - AMD "HD 5700" rounds to nearest "z" texel, and does fetch4 normally on .xy + * - AMD "R500" has fetch4 disabled + * - AMD "R580" has fetch4 enabled sampling at .xy0 + * - Intel UHD 620 sample with fetch4 at .xy0 + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray + * Wine produces same results as if fetch4 is not enabled (which probably is better) + * Test will pass on windows if either one of the allowed results is returned */ + ok(color_match(color, color_off, 2) + || broken(color_match(color, color_zround, 2) || color_match(color, color_off, 2) + || color_match(color, color_intel, 2) || color_match(color, color_amd, 2)), + "Test 3D %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_off, x, y, color); + } + release_surface_readback(&rb); + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /******************************************************** + * Tests for fetch4 enable/disable with depth textures. * + ********************************************************/ + + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) + { + D3DFORMAT format = depth_tests[i].format; + IDirect3DTexture9 *depth_texture; + IDirect3DSurface9 *ds; + + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) + { + skip("Skipping %s depth test, unsuported format.\n", depth_tests[i].name); + continue; + } + + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Setup the depth/stencil surface. */ + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + /* Render to the depth surface */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + IDirect3DSurface9_Release(ds); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + + /* Set a shader for depth sampling, otherwise windows does not show anything */ + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + for (j = 0; j < 2; ++j){ + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Do the actual shadow mapping. */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) + { + UINT x = expected_depth[depth_tests[i].index][k].x; + UINT y = expected_depth[depth_tests[i].index][k].y; + D3DCOLOR color_off = expected_depth[depth_tests[i].index][k].color_off; + D3DCOLOR color_amd = expected_depth[depth_tests[i].index][k].color_amd; + D3DCOLOR color_intel = expected_depth[depth_tests[i].index][k].color_intel; + D3DCOLOR color = get_readback_color(&rb, x, y); + /* When Fetch4 is OFF, ignore G and B channels on windows. + * Some implementations will copy R=G=B, some will set them to 0 */ + if(j == 0) + ok(color_match(color, color_off, 2) + || broken(color_match(color & 0xffff0000, color_off & 0xffff0000, 2)), + "Test OFF Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_off, x, y, depth_tests[i].name, color); + else + ok(color_match(color, color_amd, 2) + || broken(color_match(color, color_intel, 2)), + "Test ON Expected colors 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_amd, x, y, depth_tests[i].name, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + IDirect3DTexture9_Release(depth_texture); + } + + IDirect3DVolumeTexture9_Release(texture3D); + IDirect3DTexture9_Release(texture); + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + if (shaders[i].ps) + IDirect3DPixelShader9_Release(shaders[i].ps); + IDirect3DPixelShader9_Release(ps_3d); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(rt); + IDirect3DSurface9_Release(original_ds); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24660,6 +25321,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/glsl_shader.c | 43 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 13 +++++++++- dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 3298a604fd..1950db06a9 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage, + texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 7b42202213..70c6d18840 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -331,7 +331,7 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_ATI1N, WINED3DFMT_FLAG_MAPPABLE | WINED3DFMT_FLAG_BROKEN_PITCH}, {WINED3DFMT_ATI2N, WINED3DFMT_FLAG_MAPPABLE | WINED3DFMT_FLAG_BROKEN_PITCH}, {WINED3DFMT_D16_LOCKABLE, WINED3DFMT_FLAG_MAPPABLE}, - {WINED3DFMT_INTZ, WINED3DFMT_FLAG_MAPPABLE}, + {WINED3DFMT_INTZ, WINED3DFMT_FLAG_MAPPABLE | WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R11G11B10_FLOAT, WINED3DFMT_FLAG_FLOAT}, {WINED3DFMT_D32_FLOAT, WINED3DFMT_FLAG_FLOAT}, {WINED3DFMT_S8_UINT_D24_FLOAT, WINED3DFMT_FLAG_FLOAT}, @@ -340,6 +340,12 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_NULL, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_NVDB, WINED3DFMT_FLAG_EXTENSION}, {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, + {WINED3DFMT_L8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_L16_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -5780,6 +5786,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5930,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] && gl_info->supported[ARB_TEXTURE_GATHER] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 1e3ec28d6b..4224461142 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/cs.c | 5 +- dlls/wined3d/device.c | 4 + dlls/wined3d/glsl_shader.c | 214 ++++++++++++++++++++++----------- dlls/wined3d/shader.c | 13 ++ dlls/wined3d/state.c | 13 ++ dlls/wined3d/wined3d_private.h | 7 +- 6 files changed, 186 insertions(+), 70 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 7471d24e7e..890540c78d 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dee99dcde2..99248ffb1b 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2082,7 +2082,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 1950db06a9..e53306a6c2 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -49,6 +49,7 @@ WINE_DECLARE_DEBUG_CHANNEL(winediag); #define WINED3D_GLSL_SAMPLE_GRAD 0x04 #define WINED3D_GLSL_SAMPLE_LOAD 0x08 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10 +#define WINED3D_GLSL_SAMPLE_GATHER 0x20
static const struct { @@ -3613,6 +3614,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && flags & WINED3D_GLSL_SAMPLE_GATHER; const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3660,19 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ if (gather) + { + if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D) + FIXME("Unsupported Fetch4 and texture3D sampling"); + else + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -3786,12 +3801,13 @@ static void shader_glsl_color_correction(const struct wined3d_shader_instruction string_buffer_release(priv->string_buffers, reg_name); }
-static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins, - unsigned int sampler_bind_idx, const struct glsl_sample_function *sample_function, DWORD swizzle, - const char *dx, const char *dy, const char *bias, const struct wined3d_shader_texel_offset *offset, - const char *coord_reg_fmt, ...) +static void PRINTF_ATTR(10, 11) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins, + unsigned int sampler_bind_idx, const struct glsl_sample_function *sample_function, + const DWORD swizzle, const DWORD sample_flags, const char *dx, const char *dy, const char *bias, + const struct wined3d_shader_texel_offset *offset, const char *coord_reg_fmt, ...) { const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; char dst_swizzle[6]; struct color_fixup_desc fixup; BOOL np2_fixup = FALSE; @@ -3833,10 +3849,8 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ if (!string_buffer_resize(ins->ctx->buffer, ret)) break; } - if (np2_fixup) { - const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
switch (shader_glsl_get_write_mask_size(sample_function->coord_mask)) @@ -3859,6 +3873,20 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ break; } } + if(sample_flags & WINED3D_GLSL_SAMPLE_GATHER) + { + if (sample_flags & WINED3D_GLSL_SAMPLE_PROJECTED) + { + struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers); + shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx); + shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer); + string_buffer_release(priv->string_buffers, reg_name); + } + + /* Correct the fetch4 0.5 texel offset */ + shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)", + shader_glsl_get_prefix(version->type), sampler_bind_idx); + } if (dx && dy) shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy); else if (bias) @@ -5397,11 +5425,19 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) + { + sample_flags |= WINED3D_GLSL_SAMPLE_GATHER; + mask = 0; + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask;
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; + else if (sample_flags & WINED3D_GLSL_SAMPLE_GATHER) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
/* 1.0-1.3: Use destination register as coordinate source. @@ -5410,22 +5446,22 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) { char coord_mask[6]; shader_glsl_write_mask_to_str(mask, coord_mask); - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, - "T%u%s", resource_idx, coord_mask); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, NULL, NULL, + NULL, NULL, "T%u%s", resource_idx, coord_mask); } else { struct glsl_src_param coord_param; shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); - if (ins->flags & WINED3DSI_TEXLD_BIAS) + if (ins->flags & WINED3DSI_TEXLD_BIAS && sample_flags != WINED3D_GLSL_SAMPLE_GATHER) { struct glsl_src_param bias; shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, bias.param_str, - NULL, "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, + NULL, NULL, bias.param_str, NULL, "%s", coord_param.param_str); } else { - shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, NULL, NULL, NULL, NULL, - "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, resource_idx, &sample_function, swizzle, sample_flags, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); } } shader_glsl_release_sample_function(ins->ctx, &sample_function); @@ -5433,10 +5469,11 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) { + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, dx_param, dy_param; struct glsl_sample_function sample_function; - DWORD sampler_idx; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle;
if (!shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) @@ -5446,34 +5483,48 @@ static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) return; }
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* Fetch4 overwrites the other texture flags */ + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GATHER, &sample_function); + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, WINED3D_GLSL_SAMPLE_GATHER, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + }
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_GRAD, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[2], sample_function.deriv_mask, &dx_param); shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dy_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, - NULL, NULL, "%s", coord_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, 0, + dx_param.param_str, dy_param.param_str, NULL, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) { const struct wined3d_shader_version *shader_version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; struct glsl_src_param coord_param, lod_param; struct glsl_sample_function sample_function; + DWORD sampler_idx = ins->src[1].reg.idx[0].offset; DWORD swizzle = ins->src[1].swizzle; - DWORD sampler_idx; + DWORD flags = WINED3D_GLSL_SAMPLE_LOD;
- sampler_idx = ins->src[1].reg.idx[0].offset; + /* This call can be used in vertex shader, without cur_ps_args */ + if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + }
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); - if (shader_version->type == WINED3D_SHADER_TYPE_PIXEL && !shader_glsl_has_core_grad(gl_info) && !gl_info->supported[ARB_SHADER_TEXTURE_LOD]) { @@ -5482,8 +5533,20 @@ static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) * even without the appropriate extension. */ WARN("Using %s in fragment shader.\n", sample_function.name->buffer); } - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, NULL, - "%s", coord_param.param_str); + + if (flags == WINED3D_GLSL_SAMPLE_GATHER){ + swizzle = WINED3DSP_FETCH4_SWIZZLE; + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags, + NULL, NULL, NULL, NULL, "%s", coord_param.param_str); + shader_glsl_release_sample_function(ins->ctx, &sample_function); + return; + } + + shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function); + shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); + shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags, NULL, NULL, + lod_param.param_str, NULL, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
@@ -6175,6 +6238,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; @@ -6189,6 +6253,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
+ if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){ + FIXME("Unsupported FETCH4 and LD Sampling SM 5.0"); + } + shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); @@ -6196,12 +6264,12 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) if (is_multisampled(reg_maps->resource_info[resource_idx].type)) { shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &sample_param); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, 0, NULL, NULL, NULL, &ins->texel_offset, "%s, %s", coord_param.param_str, sample_param.param_str); } else { - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, 0, NULL, NULL, has_lod_param ? lod_param.param_str : NULL, &ins->texel_offset, "%s", coord_param.param_str); } @@ -6214,46 +6282,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) struct glsl_src_param coord_param, lod_param, dx_param, dy_param; unsigned int resource_idx, sampler_idx, sampler_bind_idx; struct glsl_sample_function sample_function; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD swizzle = ins->src[1].swizzle; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_GRAD) flags |= WINED3D_GLSL_SAMPLE_GRAD; if (ins->handler_idx == WINED3DSIH_SAMPLE_LOD) flags |= WINED3D_GLSL_SAMPLE_LOD; if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET; - - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset; + if (priv->cur_ps_args->fetch4 & (1u << sampler_idx)) + { + flags = WINED3D_GLSL_SAMPLE_GATHER; + swizzle = WINED3DSP_FETCH4_SWIZZLE; + }
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- switch (ins->handler_idx) + /* Fetch4 overwrites the other texture flags */ + if (flags != WINED3D_GLSL_SAMPLE_GATHER) { - case WINED3DSIH_SAMPLE: - break; - case WINED3DSIH_SAMPLE_B: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - case WINED3DSIH_SAMPLE_GRAD: - shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); - shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); - dx_param_str = dx_param.param_str; - dy_param_str = dy_param.param_str; - break; - case WINED3DSIH_SAMPLE_LOD: - shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); - lod_param_str = lod_param.param_str; - break; - default: - ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); - break; + switch (ins->handler_idx) + { + case WINED3DSIH_SAMPLE: + break; + case WINED3DSIH_SAMPLE_B: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + case WINED3DSIH_SAMPLE_GRAD: + shader_glsl_add_src_param(ins, &ins->src[3], sample_function.deriv_mask, &dx_param); + shader_glsl_add_src_param(ins, &ins->src[4], sample_function.deriv_mask, &dy_param); + dx_param_str = dx_param.param_str; + dy_param_str = dy_param.param_str; + break; + case WINED3DSIH_SAMPLE_LOD: + shader_glsl_add_src_param(ins, &ins->src[3], WINED3DSP_WRITEMASK_0, &lod_param); + lod_param_str = lod_param.param_str; + break; + default: + ERR("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx)); + break; + } }
sampler_bind_idx = shader_glsl_find_sampler(&ins->ctx->reg_maps->sampler_map, resource_idx, sampler_idx); - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, ins->src[1].swizzle, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, swizzle, flags, dx_param_str, dy_param_str, lod_param_str, &ins->texel_offset, "%s", coord_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } @@ -6299,6 +6378,9 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) unsigned int coord_size; DWORD flags = 0;
+ resource_idx = ins->src[1].reg.idx[0].offset; + sampler_idx = ins->src[2].reg.idx[0].offset; + if (ins->handler_idx == WINED3DSIH_SAMPLE_C_LZ) { lod_param = "0"; @@ -6310,8 +6392,6 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins)
if (!(resource_info = shader_glsl_get_resource_info(ins, &ins->src[1].reg))) return; - resource_idx = ins->src[1].reg.idx[0].offset; - sampler_idx = ins->src[2].reg.idx[0].offset;
shader_glsl_get_sample_function(ins->ctx, resource_idx, sampler_idx, flags, &sample_function); coord_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); @@ -6327,7 +6407,7 @@ static void shader_glsl_sample_c(const struct wined3d_shader_instruction *ins) } else { - shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, sampler_bind_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, lod_param, &ins->texel_offset, "vec%u(%s, %s)", coord_size, coord_param.param_str, compare_param.param_str); } @@ -6469,18 +6549,18 @@ static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins) switch(mask_size) { case 1: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "dot(ffp_texcoord[%u].xyz, %s)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "dot(ffp_texcoord[%u].xyz, %s)", sampler_idx, src0_param.param_str); break;
case 2: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "vec2(dot(ffp_texcoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "vec2(dot(ffp_texcoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str); break;
case 3: - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, - NULL, "vec3(dot(ffp_texcoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str); + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, + NULL, NULL, "vec3(dot(ffp_texcoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str); break;
default: @@ -6588,7 +6668,7 @@ static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, reg, reg, 0, &sample_function);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, "tmp0.xy"); + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0.xy"); shader_glsl_release_sample_function(ins->ctx, &sample_function); }
@@ -6609,7 +6689,7 @@ static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, reg, reg, 0, &sample_function);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, "tmp0.xyz"); + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0.xyz"); shader_glsl_release_sample_function(ins->ctx, &sample_function);
tex_mx->current_row = 0; @@ -6660,7 +6740,7 @@ static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
/* Sample the texture */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0%s", coord_mask); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6694,7 +6774,7 @@ static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *in shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
/* Sample the texture using the calculated coordinates */ - shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, + shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "tmp0%s", coord_mask); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6753,7 +6833,7 @@ static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "T%u%s + vec4(bumpenv_mat%u * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx, coord_param.param_str, coord_mask);
@@ -6798,7 +6878,7 @@ static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins) shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s.wx", reg_name->buffer); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6818,7 +6898,7 @@ static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins) shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx);
shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); - shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s.yz", reg_name->buffer); shader_glsl_release_sample_function(ins->ctx, &sample_function);
@@ -6837,7 +6917,7 @@ static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, 0, &sample_function); shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, NULL, + shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, 0, NULL, NULL, NULL, NULL, "%s", src0_param.param_str); shader_glsl_release_sample_function(ins->ctx, &sample_function); } diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..6e21444f8c 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,19 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ if (gl_info->supported[ARB_TEXTURE_GATHER]) + { + for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..dc69e935c5 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,17 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if(gl_info->supported[ARB_TEXTURE_GATHER] && + ((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 4224461142..654e4ff610 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,8 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */
enum wined3d_shader_src_modifier { @@ -1358,7 +1359,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1895,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
Hi,
Am 05.02.19 um 01:19 schrieb Daniel Ansorregui:
- if(sample_flags & WINED3D_GLSL_SAMPLE_GATHER)
Formatting. "if(" -> "if ("
- /* Fetch4 overwrites the other texture flags */
- if (priv->cur_ps_args->fetch4 & (1u << resource_idx))
- {
sample_flags |= WINED3D_GLSL_SAMPLE_GATHER;
mask = 0;
- }
I am probably missing something, but the sample flags logical OR doesn't quite match up with the comment. Is the comment outdated from previous Intel-only behavior?
if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
- else if (sample_flags & WINED3D_GLSL_SAMPLE_GATHER) swizzle = WINED3DSP_FETCH4_SWIZZLE; else swizzle = ins->src[1].swizzle;
I have some doubts here. This would mean 1.x pixel shaders have a fetch4 swizzle that is different from fixed function and shader model 2.0+. Shader model 2.0 also allows specifying a post-lookup swizzle on the sampler (texld r0, s0.zwxz, v3 for sample will put the texture's blue channel into .x and .w, the alpha into .y and red in .z). I suspect you need to merge WINED3DSP_FETCH4_SWIZZLE with ins->src[1].swizzle. I think we should have a function for that, but right now I can only find shader_glsl_swizzle_get_component.
It is quite possible that the sampler swizzle is ignored by windows drivers if fetch4, considering how many other texture fetch properties are ignored.
The same applies to the other functions you modify of course, like shader_glsl_texldd.
- if(priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx))
- {
flags = WINED3D_GLSL_SAMPLE_GATHER;
- }
Formatting
- if (flags == WINED3D_GLSL_SAMPLE_GATHER){
Formatting
swizzle = WINED3DSP_FETCH4_SWIZZLE;
shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags,
NULL, NULL, NULL, NULL, "%s", coord_param.param_str);
shader_glsl_release_sample_function(ins->ctx, &sample_function);
return;
- }
- shader_glsl_get_sample_function(ins->ctx, sampler_idx, sampler_idx, WINED3D_GLSL_SAMPLE_LOD, &sample_function);
- shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
- shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
- shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, flags, NULL, NULL,
shader_glsl_release_sample_function(ins->ctx, &sample_function);lod_param.param_str, NULL, "%s", coord_param.param_str);
}
@@ -6175,6 +6238,7 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) struct glsl_sample_function sample_function; DWORD flags = WINED3D_GLSL_SAMPLE_LOAD; BOOL has_lod_param;
const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
if (wined3d_shader_instruction_has_texel_offset(ins)) flags |= WINED3D_GLSL_SAMPLE_OFFSET;
@@ -6189,6 +6253,10 @@ static void shader_glsl_ld(const struct wined3d_shader_instruction *ins) } has_lod_param = is_mipmapped(reg_maps->resource_info[resource_idx].type);
- if (priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << resource_idx)){
FIXME("Unsupported FETCH4 and LD Sampling SM 5.0");
- }
Formatting. This case is unlikely to be hit, but I guess not impossible if a game really tries to set this magic LOD value in d3d10/11.
@@ -3833,10 +3849,8 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ if (!string_buffer_resize(ins->ctx->buffer, ret)) break; }
- if (np2_fixup) {
Is the removed newline intentional?
@@ -6214,46 +6282,57 @@ static void shader_glsl_sample(const struct wined3d_shader_instruction *ins) ...
- if (priv->cur_ps_args->fetch4 & (1u << sampler_idx))
- {
flags = WINED3D_GLSL_SAMPLE_GATHER;
swizzle = WINED3DSP_FETCH4_SWIZZLE;
- }
Afaics the WINED3DSIH_SAMPLE* opcodes are shader model 4+ only, so I don't think you have to handle fetch4 here. Either write a fixme like in shader_glsl_ld or ignore it entirely.
@@ -6588,7 +6668,7 @@ static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins) shader_glsl_get_sample_function(ins->ctx, reg, reg, 0, &sample_function);
This function and the following ones could in theory be used with FETCH4 - they are pixel shader 1.x specific. I'm happy with a FIXME. Those functions are somewhat confusing and special because they need to be preceeded by one or more texm3x3pad instructions.
diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..6e21444f8c 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,19 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
- if (gl_info->supported[ARB_TEXTURE_GATHER])
- {
I'm not a fan of checking the (rather static) GL capabilities in this performance critical codepath. I think it would be better to remove WINED3DFMT_FLAG_ALLOW_FETCH4 from all formats during adapter creation if the texture is not supported. That would allow you to remove not only this but also other gl_info->supported[ARB_TEXTURE_GATHER] in this patch and patch 2.
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 29 +++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 37 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 70c6d18840..da8408ca27 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -346,6 +350,8 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_R16, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_R32_FLOAT, WINED3DFMT_FLAG_ALLOW_FETCH4}, {WINED3DFMT_A8_UNORM, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF16, WINED3DFMT_FLAG_ALLOW_FETCH4}, + {WINED3DFMT_DF24, WINED3DFMT_FLAG_ALLOW_FETCH4}, };
static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) @@ -1894,6 +1900,24 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT24, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3533,6 +3557,9 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4387,6 +4414,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index e09e4e1fce..e6c3c1b802 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
Hi,
I got some failures on r500 on Win7, I've attached the test output. My guess is that r500 applies fetch4 for ATI1, and there's still the different green and blue components in DF24.
Evergreen has texldd and texldl failures. I think I did not see them last time. Weird...
A few comments are inline. The general structure looks good to me. Am 05.02.19 um 01:19 schrieb Daniel Ansorregui:
BOOL broken_wine; /* Do not check it on wine because is known to be broken */
I'd prefer the name "todo_wine". Wine isn't broken, it's just not finished yet :-P. Only windows is broken().
(Jokes aside, I think it makes sense to stick to the names of "todo_wine" and "broken()")
const char * name; /* The name of the texture format */
Minor nitpick: "const char *name;"
- const BOOL isWin = strcmp(winetest_platform, "wine");
This is not used
get_rt_readback(original_rt, &rb);
D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y);
Trailing whitespace
February 7, 2019 10:42 AM, "Stefan Dösinger" [email protected] wrote:
I'd prefer the name "todo_wine". Wine isn't broken, it's just not finished yet :-P. Only windows is broken().
(Jokes aside, I think it makes sense to stick to the names of "todo_wine" and "broken()")
Isn't that the name of a macro?
Chip
Am 07.02.2019 um 18:08 schrieb Chip Davis [email protected]:
February 7, 2019 10:42 AM, "Stefan Dösinger" [email protected] wrote:
I'd prefer the name "todo_wine". Wine isn't broken, it's just not finished yet :-P. Only windows is broken().
(Jokes aside, I think it makes sense to stick to the names of "todo_wine" and "broken()")
Isn't that the name of a macro?
Er right, yes, I hit the name of the macro. I think other tests use just "todo". My point was that we should avoid confusing the terms "broken" and "todo".
- Test texld/texldp/texldd/texldb/texldl in PS and FFP - Test supported/unsupported texture formats on FFP/texld/texldp - Test 3dtextures (Disabled, each platform has different results) - Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/d3d9/tests/visual.c | 679 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 679 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..5ccced40be 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,684 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{ + static const DWORD vs_code[] = + { + 0xfffe0300, /* vs_3_0 */ + 0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */ + 0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */ + 0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */ + 0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */ + 0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */ + 0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */ + 0x0000ffff + }; + static const DWORD ps_code_texld[] = + { + /* Test texld */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_texldp[] = + { + /* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */ + 0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */ + 0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw, c0.zwww */ + 0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldd[] = + { + /* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Sampling LOD gradient should be ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x3f000000, 0x3f000000, 0x3f000000, 0x3f000000, /* def c0, 0.5, 0.5, 0.5, 0.5 */ + 0x05000051, 0xa00f0001, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, /* def c0, 1.0, 1.0, 1.0, 1.0 */ + 0x02000001, 0x800f0002, 0xa0e40000, /* mov r2, c0 */ + 0x0500005d, 0x800f0000, 0x90e40000, 0xa0e40800, 0xa0e40000, 0x80e40002, /* texldd r0, v0, s0, c0, r2 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldb[] = + { + /* Test texldb : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x40a00000, 0x40a00000, /* def c0, 0.0, 0.0, 5.0, 5.0 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x03020042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldb r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_texldl[] = + { + /* Test texldl : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD. + * The explicit LOD level is then ignored. Same result as texld */ + /* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldl */ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x3f000000, 0x3f000000, /* def c0, 0.0, 0.0, 0.5, 0.5 */ + 0x03000002, 0x800f0000, 0x90e40000, 0xa0e40000, /* add r0, v0, c0 */ + 0x0300005f, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldl r0, r0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff, /* end */ + }; + static const DWORD ps_code_3d[] = + { + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0xa0000000, 0xa00f0800, /* dcl_volume s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + static const DWORD ps_code_swizzle[] = + { + /* Test texld when sampling with a swizzle .yzwx*/ + 0xffff0300, /* ps_3_0 */ + 0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */ + 0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */ + 0x03000042, 0x800f0000, 0x90e40000, 0xa0390800, /* texld r0, v0, s0.yzwx */ + 0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */ + 0x0000ffff /* end */ + }; + + static const struct + { + struct vec3 position; + struct vec3 texcoord; + } + quad[] = + { + /* Tilted on Z axis to get a depth gradient in the depth test */ + /* NOTE: Using 0.55f-0.6f to avoid rounding errors on depth tests */ + {{-1.0f, 1.0f, 1.0f}, {0.0f,0.0f,0.6f} }, + {{ 1.0f, 1.0f, 0.0f}, {1.0f,0.0f,0.6f} }, + {{-1.0f,-1.0f, 0.0f}, {0.0f,1.0f,0.6f} }, + {{ 1.0f,-1.0f, 0.0f}, {1.0f,1.0f,0.6f} } + }; + + static const struct + { + UINT x[4], y[4]; /* Matrix Sampling positions */ + D3DCOLOR color_amd[16]; /* AMD original implementation swizzle with -0.5 texel coord */ + D3DCOLOR color_intel[16]; /* Intel UHD 620 implementation swizzle with no texel coord correction */ + /* Wine follows the AMD immplementation, and consider an error the Intel one results + * However, the test will accept as valid the intel only if running on windows */ + D3DCOLOR color_3d_fetch4_off[16]; + D3DCOLOR color_fetch4_off[16]; + } + expected_colors = + { + { 40, 200, 360, 520}, + { 30, 150, 270, 390}, + /* AMD implementation - Wine implementation */ + {0x131202f2, 0x1211f2f1, 0x1110f101, 0x10130102, + 0x02f204f4, 0xf2f1f4f3, 0xf101f303, 0x01020304, + 0x04f42322, 0xf4f32221, 0xf3032120, 0x03042023, + 0x23221312, 0x22211211, 0x21201110, 0x20231013}, + /* Intel UHD 620 implementation */ + {0x23102013, 0x22132312, 0x21122211, 0x20112110, + 0x13011002, 0x120213f2, 0x11f212f1, 0x10f11101, + 0x02030104, 0xf20402f4, 0xf1f4f2f3, 0x01f3f103, + 0x04200323, 0xf4230422, 0xf322f421, 0x0321f320}, + /* Fetch4 off on 3D textures */ + {0xff020202, 0xfff2f2f2, 0xfff1f1f1, 0xff010101, + 0xff050505, 0xfff4f4f4, 0xfff3f3f3, 0xff030303, + 0xff232323, 0xff222222, 0xff212121, 0xff202020, + 0xff131313, 0xff121212, 0xff111111, 0xff101010}, + /* Fetch4 off on 2D texture */ + {0x13131313, 0x12121212, 0x11111111, 0x10101010, + 0x02020202, 0xf2f2f2f2, 0xf1f1f1f1, 0x01010101, + 0x04040404, 0xf4f4f4f4, 0xf3f3f3f3, 0x03030303, + 0x23232323, 0x22222222, 0x21212121, 0x20202020} + }; + + static const DWORD fetch4_data[] = {0x10111213, + 0x01f1f202, + 0x03f3f404, + 0x20212223}; + + static struct + { + IDirect3DVertexShader9 *vs; + IDirect3DPixelShader9 *ps; + const DWORD *ps_code; + const char *name; + BOOL projection; /* The results should be projected (zoomed by 2) */ + BOOL swizzled; /* The result will be shifted by the swizzle */ + } + shaders[] = + { + {NULL, NULL, NULL, "FFP", FALSE, FALSE}, + {NULL, NULL, ps_code_texld, "texld", FALSE, FALSE}, + {NULL, NULL, ps_code_texldp, "texldp", TRUE, FALSE}, + {NULL, NULL, ps_code_texldd, "texldd", FALSE, FALSE}, + {NULL, NULL, ps_code_texldb, "texldb", FALSE, FALSE}, + {NULL, NULL, ps_code_texldl, "texldl", FALSE, FALSE}, + {NULL, NULL, ps_code_swizzle, "texld_swizzle", FALSE, TRUE} + }; + + static const struct + { + BOOL todo_in_wine; /* Do not check thorougly on wine because is known to be wrong */ + const char *name; /* The name of the texture format */ + D3DFORMAT format; /* The format of the texture */ + DWORD data; /* The data we will write to the first line */ + UINT x, y; /* Where we expect the color to be */ + UINT w, h; /* Texture size */ + D3DCOLOR color_amd[3]; /* Wine results. Results on AMD swizzle + texture offset */ + D3DCOLOR color_intel[3]; /* Results with intel UHD 620, intel swizzle + no texel offset */ + } + format_tests[] = + { + /* Enabled formats */ + {FALSE, "L8", D3DFMT_L8, + 0xff804010, 360, 270, 2, 2, + {0x00004010, 0x00004010, 0x10400000}, + {0x40001000, 0x40001000, 0x40001000} + }, + {FALSE, "L16", D3DFMT_L16, + 0xff804010, 360, 270, 2, 2, + {0x0000ff40, 0x0000ff40, 0x40ff0000}, + {0xff004000, 0xff004000, 0xff004000} + }, + {FALSE, "R16F", D3DFMT_R16F, + 0x38003c00, 360, 270, 2, 2, + {0x000080ff, 0x000080ff, 0xff800000}, + {0x8000ff00, 0x8000ff00, 0x8000ff00} + }, + {FALSE, "R32F", D3DFMT_R32F, + 0x3f000000, 360, 270, 2, 2, + {0x00000080, 0x00000080, 0x80000000}, + {0x00008000, 0x00008000, 0x00008000} + }, + {FALSE, "ATI1", MAKEFOURCC('A','T','I','1'), + 0xb97700ff, 360, 60, 4, 4, + {0x6d6d6d6d, 0x6d6d6d6d, 0x49494949}, + {0xff6d00ff, 0xff6d00ff, 0xff4900ff} + }, + + /* Disabled format on Intel, enabled on AMD, broken on wine + * since it is implemented with GL_ALPHA, and fetch4 will fetch RED value */ + {TRUE, "A8", D3DFMT_A8, + 0xff804010, 360, 270, 2, 2, + {0x00004010, 0x00004010, 0x10400000}, + {0x00000000, 0x00000000, 0x00000000} + }, + + /* Disabled format */ + {FALSE, "A8R8G8B8", D3DFMT_A8R8G8B8, + 0xff804010, 360, 270, 2, 2, + {0x00000000, 0x00000000, 0xff804010}, + {0x00000000, 0x00000000, 0xff804010} + }, + }; + + static const struct + { + D3DCOLOR color_off, color_amd, color_intel; + UINT x, y; + } + expected_depth[][4] = + { + { + /* This is the expected result for shadow samplers */ + {0xffffffff,0xffffffff,0xffffffff, 20, 15}, + {0xffffffff,0xffffffff,0xffffffff,260, 15}, + {0x00000000,0x00000000,0x00000000, 20,255}, + {0x00000000,0x00000000,0x00000000,260,135}, + }, + { + /* This is the expected result with DF16 */ + {0xfffe0000,0xfedfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + }, + { + /* This is the expected result with DF24 */ + {0xffff0000,0xffdfdfbf,0x202000ff, 20, 15}, + {0xff9f0000,0x9f7f7f5f,0x00bf009f,260, 15}, + {0xff800000,0x7f5f5f3f,0x9f000080, 20,255}, + {0xff600000,0x5f3f3f1f,0x80809f60,260,135}, + }, + { + /* This is the expected result with INTZ */ + {0xffffffff,0xffdfdfbf,0x202000ff, 20, 15}, + {0x9f9f9f9f,0x9f7f7f5f,0x00bf009f,260, 15}, + {0x7f7f7f7f,0x7f5f5f3f,0x9f000080, 20,255}, + {0x5f5f5f5f,0x5f3f3f1f,0x80809f60,260,135}, + } + }; + + static const struct + { + D3DFORMAT format; + const char *name; + UINT index; + } + depth_tests[] = + { + {D3DFMT_D16_LOCKABLE, "D16_LOCKABLE", 0}, + {D3DFMT_D32, "D32", 0}, + {D3DFMT_D15S1, "D15S1", 0}, + {D3DFMT_D24S8, "D24S8", 0}, + {D3DFMT_D24X8, "D24X8", 0}, + {D3DFMT_D24X4S4, "D24X4S4", 0}, + {D3DFMT_D16, "D16", 0}, + {D3DFMT_D32F_LOCKABLE, "D32F_LOCKABLE", 0}, + {D3DFMT_D24FS8, "D24FS8", 0}, + {MAKEFOURCC('D','F','1','6'), "DF16", 1}, + {MAKEFOURCC('D','F','2','4'), "DF24", 2}, + {MAKEFOURCC('I','N','T','Z'), "INTZ", 3}, + }; + + IDirect3DSurface9 *original_ds, *original_rt, *rt; + IDirect3DVolumeTexture9 *texture3D; + IDirect3DPixelShader9 *ps_3d; + struct surface_readback rb; + IDirect3DVertexShader9 *vs; + IDirect3DTexture9 *texture; + IDirect3DDevice9 *device; + D3DLOCKED_RECT lr; + D3DLOCKED_BOX lb; + IDirect3D9 *d3d; + ULONG refcount; + D3DCAPS9 caps; + UINT i, j, k; + HWND window; + HRESULT hr; + + window = create_window(); + d3d = Direct3DCreate9(D3D_SDK_VERSION); + ok(!!d3d, "Failed to create a D3D object.\n"); + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, MAKEFOURCC('D','F','2','4')))) + { + skip("No DF24 support, skipping FETCH4 test.\n"); + goto done; + } + if (!(device = create_device(d3d, window, window, TRUE))) + { + skip("Failed to create a D3D device, skipping tests.\n"); + goto done; + } + + hr = IDirect3DDevice9_GetDeviceCaps(device, &caps); + ok(SUCCEEDED(hr), "GetDeviceCaps failed, hr %#x.\n", hr); + if (caps.PixelShaderVersion < D3DPS_VERSION(3, 0)) + { + skip("No pixel shader 3.0 support, skipping FETCH4 test.\n"); + IDirect3DDevice9_Release(device); + goto done; + } + hr = IDirect3DDevice9_GetRenderTarget(device, 0, &original_rt); + ok(SUCCEEDED(hr), "GetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_GetDepthStencilSurface(device, &original_ds); + ok(SUCCEEDED(hr), "GetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_CreateRenderTarget(device, 8, 8, D3DFMT_A8R8G8B8, + D3DMULTISAMPLE_NONE, 0, FALSE, &rt, NULL); + ok(SUCCEEDED(hr), "CreateRenderTarget failed, hr %#x.\n", hr); + + /* Create our texture for FETCH4 shader testing */ + hr = IDirect3DDevice9_CreateTexture(device, 4, 4, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(texture, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + memcpy((BYTE *)lr.pBits + i*lr.Pitch, &fetch4_data[i], sizeof(fetch4_data[i])); + hr = IDirect3DTexture9_UnlockRect(texture, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + + /* Create vertex shader */ + hr = IDirect3DDevice9_CreateVertexShader(device, vs_code, &vs); + ok(hr == D3D_OK, "IDirect3DDevice9_CreateVertexShader returned %08x\n", hr); + /* Prepare the pixel shaders */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + if (shaders[i].ps_code) + { + hr = IDirect3DDevice9_CreatePixelShader(device, shaders[i].ps_code, &shaders[i].ps); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + /* Copy vertex shader pointer if a PS is present */ + shaders[i].vs = vs; + } + } + hr = IDirect3DDevice9_CreatePixelShader(device, ps_code_3d, &ps_3d); + ok(SUCCEEDED(hr), "CreatePixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ | D3DFVF_TEX1 | D3DFVF_TEXCOORDSIZE3(0)); + ok(SUCCEEDED(hr), "SetFVF failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZENABLE, D3DZB_TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZFUNC, D3DCMP_ALWAYS); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_ZWRITEENABLE, TRUE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderState(device, D3DRS_LIGHTING, FALSE); + ok(SUCCEEDED(hr), "SetRenderState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* According to the spec, FETCH4 is enabled when D3DSAMP_MIPMAPLODBIAS == GET4 and also + * D3DSAMP_MAGFILTER == D3DTEXF_POINT. But apparently only GET4 is needed for it to get active. + * However, AMD HW r500 samples always as if POINT (nearest filtering) is selected with FETCH4 + * the driver later on corrected this by adding -0.5 texel coord. */ + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','4')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MINFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, D3DSAMP_MIPFILTER, D3DTEXF_POINT); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /*********************************************************************** + * Tests for FFP/PS correctness when using L8 texture with fetch4. * + ***********************************************************************/ + + /* Render with fetch4 and test if we obtain proper results for all sampler FFP/PS instructions */ + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[i].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[i].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[shaders[i].projection ? (j/4/2*4 + (j%4)/2) : j]; + if (shaders[i].swizzled) + color_amd = ((color_amd << 8) & 0xffffff00) + ((color_amd >> 24) & 0x000000ff); + D3DCOLOR color_intel = expected_colors.color_intel[j]; + D3DCOLOR color_off = expected_colors.color_intel[j]; + /* In some AMD cards (HD5700) texldd/texldl have fetch4 off on windows */ + ok(color_match(color, color_amd, 1) || broken(color_match(color, color_intel, 1)) + || broken(color_match(color, color_off, 1)), + "Test %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_amd, x, y, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /*************************************************************************** + * Tests for fetch4 enable/disable with different texture formats in FFP/PS. * + ***************************************************************************/ + + /* Create the textures to test FETCH4 does work/not work there as expected */ + for (i = 0; i < ARRAY_SIZE(format_tests); ++i) + { + IDirect3DTexture9 *tex; + hr = IDirect3DDevice9_CreateTexture(device, format_tests[i].w, format_tests[i].h, + 1, 0, format_tests[i].format, D3DPOOL_MANAGED, &tex, NULL); + ok(hr == D3D_OK, "Failed to create texture, hr %#x.\n", hr); + hr = IDirect3DTexture9_LockRect(tex, 0, &lr, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture, hr %#x.\n", hr); + memcpy(lr.pBits, &format_tests[i].data, 4); + hr = IDirect3DTexture9_UnlockRect(tex, 0); + ok(hr == D3D_OK, "Failed to unlock texture, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)tex); + ok(hr == D3D_OK, "Failed to set texture, hr %#x.\n", hr); + + /* Test if FETCH4 is enabled/disabled when different textures are used with FFP/texld/texldp */ + for (j = 0; j < ARRAY_SIZE(format_tests[i].color_amd); ++j) + { + hr = IDirect3DDevice9_SetVertexShader(device, shaders[j].vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[j].ps); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + D3DCOLOR color = get_readback_color(&rb, format_tests[i].x, format_tests[i].y); + D3DCOLOR color_amd = format_tests[i].color_amd[j]; + D3DCOLOR color_intel = format_tests[i].color_intel[j]; + /* On windows just test the R channel, since G/B might be 0xff or 0x00 */ + todo_wine_if(format_tests[i].todo_in_wine) ok(color_match(color, color_amd, 1) + || broken(color_match(color, color_intel, 1)) + || broken(color_match(color & 0x00ff0000, color_amd & 0x00ff0000, 1)), + "Test %s on %s expected color 0x%08x at (%u, %u), got 0x%08x.\n", format_tests[i].name, + shaders[j].name, color_amd, format_tests[i].x, format_tests[i].y, color); + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + IDirect3DTexture9_Release(tex); + } + + /************************************************** + * Tests that fetch4 works with 3D textures. * + **************************************************/ + + /* Create volume (3D) texture */ + IDirect3DDevice9_CreateVolumeTexture(device, 4, 4, 2, 1, 0, D3DFMT_L8, D3DPOOL_MANAGED, &texture3D, NULL ); + ok(hr == D3D_OK, "Failed to create volume texture, hr %#x.\n", hr); + hr = IDirect3DVolumeTexture9_LockBox(texture3D, 0, &lb, NULL, 0); + ok(hr == D3D_OK, "Failed to lock texture3D, hr %#x.\n", hr); + for (i = 0; i < ARRAY_SIZE(fetch4_data); ++i) + { + memcpy((BYTE *)lb.pBits + i*lb.RowPitch, &fetch4_data[i], sizeof(fetch4_data[i])); + /* Shift the lower level, to keep it different */ + memcpy((BYTE *)lb.pBits + i*lb.RowPitch + lb.SlicePitch, &fetch4_data[(i+1)%4], sizeof(fetch4_data[i])); + } + hr = IDirect3DVolumeTexture9_UnlockBox(texture3D, 0); + ok(hr == D3D_OK, "Failed to unlock texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture3D); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + + /* Test FFP and texld with dcl_volume (ps_3d) */ + for (i = 0; i < 2; ++i) + { + hr = IDirect3DDevice9_SetVertexShader(device, i ? vs : NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, i ? ps_3d : NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + get_rt_readback(original_rt, &rb); + for (j = 0; j < ARRAY_SIZE(expected_colors.color_amd); ++j) + { + UINT x = expected_colors.x[j % 4]; + UINT y = expected_colors.y[j / 4]; + D3DCOLOR color = get_readback_color(&rb, x, y); + D3DCOLOR color_amd = expected_colors.color_amd[j]; + D3DCOLOR color_intel = expected_colors.color_intel[j]; + D3DCOLOR color_off = expected_colors.color_3d_fetch4_off[j]; + D3DCOLOR color_zround = expected_colors.color_amd[(j+4) % ARRAY_SIZE(expected_colors.color_amd)]; + /* Note: Fetch4 on 3D textures have different results based on the vendor/driver + * - AMD "HD 5700" rounds to nearest "z" texel, and does fetch4 normally on .xy + * - AMD "R500" has fetch4 disabled + * - AMD "R580" has fetch4 enabled sampling at .xy0 + * - Intel UHD 620 sample with fetch4 at .xy0 + * Currently unimplemented on wine due to lack of GL functionality to cast 3D->2DArray + * Wine produces same results as if fetch4 is not enabled (which probably is better) + * Test will pass on windows if either one of the allowed results is returned */ + ok(color_match(color, color_off, 2) + || broken(color_match(color, color_zround, 2) || color_match(color, color_off, 2) + || color_match(color, color_intel, 2) || color_match(color, color_amd, 2)), + "Test 3D %s Expected color 0x%08x at (%u, %u), got 0x%08x.\n", shaders[i].name, + color_off, x, y, color); + } + release_surface_readback(&rb); + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + /******************************************************** + * Tests for fetch4 enable/disable with depth textures. * + ********************************************************/ + + for (i = 0; i < ARRAY_SIZE(depth_tests); ++i) + { + D3DFORMAT format = depth_tests[i].format; + IDirect3DTexture9 *depth_texture; + IDirect3DSurface9 *ds; + + if (FAILED(IDirect3D9_CheckDeviceFormat(d3d, D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, + D3DFMT_X8R8G8B8, D3DUSAGE_DEPTHSTENCIL, D3DRTYPE_TEXTURE, format))) + { + skip("Skipping %s depth test, unsuported format.\n", depth_tests[i].name); + continue; + } + + hr = IDirect3DDevice9_CreateTexture(device, 8, 8, 1, + D3DUSAGE_DEPTHSTENCIL, format, D3DPOOL_DEFAULT, &depth_texture, NULL); + ok(SUCCEEDED(hr), "CreateTexture failed, hr %#x.\n", hr); + hr = IDirect3DTexture9_GetSurfaceLevel(depth_texture, 0, &ds); + ok(SUCCEEDED(hr), "GetSurfaceLevel failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetDepthStencilSurface(device, ds); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetVertexShader(device, NULL); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, NULL); + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)texture); + ok(hr == D3D_OK, "Failed to set texture3D, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T','1')); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Setup the depth/stencil surface. */ + hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_ZBUFFER, 0, 0.0f, 0); + ok(SUCCEEDED(hr), "Clear failed, hr %#x.\n", hr); + + /* Render to the depth surface */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + hr = IDirect3DDevice9_SetDepthStencilSurface(device, NULL); + ok(SUCCEEDED(hr), "SetDepthStencilSurface failed, hr %#x.\n", hr); + IDirect3DSurface9_Release(ds); + hr = IDirect3DDevice9_SetRenderTarget(device, 0, original_rt); + ok(SUCCEEDED(hr), "SetRenderTarget failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetTexture(device, 0, (IDirect3DBaseTexture9 *)depth_texture); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + + /* Set a shader for depth sampling, otherwise windows does not show anything */ + hr = IDirect3DDevice9_SetVertexShader(device, vs); + ok(SUCCEEDED(hr), "Failed to set vertex shader, hr %#x.\n", hr); + hr = IDirect3DDevice9_SetPixelShader(device, shaders[1].ps); /* same as texld */ + ok(SUCCEEDED(hr), "SetPixelShader failed, hr %#x.\n", hr); + + for (j = 0; j < 2; ++j){ + hr = IDirect3DDevice9_SetSamplerState(device, 0, + D3DSAMP_MIPMAPLODBIAS, MAKEFOURCC('G','E','T', j ? '4' : '1' )); + ok(SUCCEEDED(hr), "SetSamplerState failed, hr %#x.\n", hr); + + /* Do the actual shadow mapping. */ + hr = IDirect3DDevice9_BeginScene(device); + ok(SUCCEEDED(hr), "BeginScene failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_TRIANGLESTRIP, 2, quad, sizeof(*quad)); + ok(SUCCEEDED(hr), "DrawPrimitiveUP failed, hr %#x.\n", hr); + hr = IDirect3DDevice9_EndScene(device); + ok(SUCCEEDED(hr), "EndScene failed, hr %#x.\n", hr); + + get_rt_readback(original_rt, &rb); + for (k = 0; k < ARRAY_SIZE(expected_depth[depth_tests[i].index]); ++k) + { + UINT x = expected_depth[depth_tests[i].index][k].x; + UINT y = expected_depth[depth_tests[i].index][k].y; + D3DCOLOR color_off = expected_depth[depth_tests[i].index][k].color_off; + D3DCOLOR color_amd = expected_depth[depth_tests[i].index][k].color_amd; + D3DCOLOR color_intel = expected_depth[depth_tests[i].index][k].color_intel; + D3DCOLOR color = get_readback_color(&rb, x, y); + /* When Fetch4 is OFF, ignore G and B channels on windows. + * Some implementations will copy R=G=B, some will set them to 0 */ + if(j == 0) + ok(color_match(color, color_off, 2) + || broken(color_match(color & 0x00ff0000, color_off & 0x00ff0000, 2)), + "Test OFF Expected color 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_off, x, y, depth_tests[i].name, color); + else + ok(color_match(color, color_amd, 2) + || broken(color_match(color, color_intel, 2)), + "Test ON Expected colors 0x%08x at (%u, %u) for format %s, got 0x%08x.\n", + color_amd, x, y, depth_tests[i].name, color); + } + release_surface_readback(&rb); + + hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL); + ok(SUCCEEDED(hr), "Present failed, hr %#x.\n", hr); + } + + hr = IDirect3DDevice9_SetTexture(device, 0, NULL); + ok(SUCCEEDED(hr), "SetTexture failed, hr %#x.\n", hr); + IDirect3DTexture9_Release(depth_texture); + } + + IDirect3DVolumeTexture9_Release(texture3D); + IDirect3DTexture9_Release(texture); + for (i = 0; i < ARRAY_SIZE(shaders); ++i) + if (shaders[i].ps) + IDirect3DPixelShader9_Release(shaders[i].ps); + IDirect3DPixelShader9_Release(ps_3d); + IDirect3DVertexShader9_Release(vs); + IDirect3DSurface9_Release(rt); + IDirect3DSurface9_Release(original_ds); + IDirect3DSurface9_Release(original_rt); + refcount = IDirect3DDevice9_Release(device); + ok(!refcount, "Device has %u references left.\n", refcount); +done: + IDirect3D9_Release(d3d); + DestroyWindow(window); +} + static void shadow_test(void) { static const DWORD ps_code[] = @@ -24660,6 +25338,7 @@ START_TEST(visual) depth_buffer2_test(); depth_blit_test(); intz_test(); + fetch4_test(); shadow_test(); fp_special_test(); depth_bounds_test();
- Add flag to indicate FETCH4 support in textures - Implementation follows AMD implementation and swizzle projection is allowed and 0.5 texel offset is added
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/glsl_shader.c | 43 ++++++++++++++++++++++++++++------ dlls/wined3d/utils.c | 27 +++++++++++++++++++++ dlls/wined3d/wined3d_private.h | 4 +++- 3 files changed, 66 insertions(+), 8 deletions(-)
diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 3298a604fd..1950db06a9 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -9711,6 +9711,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "#extension GL_ARB_shading_language_420pack : enable\n"); if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); + if (gl_info->supported[ARB_TEXTURE_GATHER]) + shader_addline(buffer, "#extension GL_ARB_texture_gather : enable\n");
if (!needs_legacy_glsl_syntax(gl_info)) { @@ -9851,6 +9853,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) { const char *texture_function, *coord_mask; + struct wined3d_string_buffer offset; + BOOL fetch4 = settings->op[stage].fetch4; + BOOL fetch4_proj = FALSE; BOOL proj;
if (!(tex_map & (1u << stage))) @@ -9870,7 +9875,6 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); proj = TRUE; } - if (settings->op[stage].tex_type == WINED3D_GL_RES_TYPE_TEX_CUBE) proj = FALSE;
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x"; + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D"; @@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz"; + if (fetch4) + FIXME("Unsupported Fetch4 and texture3D sampling"); + fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube"; @@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE; + fetch4 = FALSE; break; } if (!legacy_syntax) texture_function = "texture";
+ string_buffer_init(&offset); + if (fetch4) + { + texture_function = "textureGather"; + /* Apply a 0.5 texel offset as in AMD implementation */ + shader_addline(&offset, " + (vec2(0.5) / textureSize(ps_sampler%u, 0).xy)", stage); + + /* When projection is needed on fetch4 we have to apply it manually by dividing .w */ + fetch4_proj = proj; + proj = FALSE; + } + if (stage > 0 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) @@ -9936,8 +9957,8 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * shader_addline(buffer, "ret = ffp_texcoord[%u] + ret.xyxy;\n", stage); }
- shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ret.%s%s%s%s);\n", stage, texture_function, + proj ? "Proj" : "", stage, coord_mask, proj ? "w" : "", fetch4_proj ? " / ret.w" : "", offset.buffer);
if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) shader_addline(buffer, "tex%u *= clamp(tex%u.z * bumpenv_lum_scale%u + bumpenv_lum_offset%u, 0.0, 1.0);\n", @@ -9945,14 +9966,22 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * } else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage, + texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else { - shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n", - stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage, + texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : ""); + if (fetch4_proj) + shader_addline(buffer, " / ffp_texcoord[%u].w", stage); + shader_addline(buffer, "%s);\n", offset.buffer); } + string_buffer_clear(&offset); + + /* Match FETCH4 swizzle with textureGather swizzle */ + if (fetch4) + shader_addline(buffer, "tex%u = tex%u.zxyw;\n", stage, stage);
string_buffer_sprintf(tex_reg_name, "tex%u", stage); shader_glsl_color_correction_ext(buffer, tex_reg_name->buffer, WINED3DSP_WRITEMASK_ALL, diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 7b42202213..b0cae022c6 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -342,6 +342,19 @@ static const struct wined3d_format_base_flags format_base_flags[] = {WINED3DFMT_RESZ, WINED3DFMT_FLAG_EXTENSION}, };
+/* List of textures were fetch4 can be enabled. + * Only available if ARB_TEXTURE_GATHER extension is present */ +static const enum wined3d_format_id wined3d_format_fetch4_enabled[] = +{ + WINED3DFMT_L8_UNORM, + WINED3DFMT_L16_UNORM, + WINED3DFMT_R16_FLOAT, + WINED3DFMT_R16, + WINED3DFMT_R32_FLOAT, + WINED3DFMT_A8_UNORM, + WINED3DFMT_INTZ, +}; + static void rgb888_from_rgb565(WORD rgb565, BYTE *r, BYTE *g, BYTE *b) { BYTE c; @@ -2120,6 +2133,15 @@ static BOOL init_format_base_info(struct wined3d_adapter *adapter) format_set_flag(format, format_base_flags[i].flags); }
+ if (adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + for (i = 0; i < ARRAY_SIZE(wined3d_format_fetch4_enabled); ++i) + { + if (!(format = get_format_internal(adapter, wined3d_format_fetch4_enabled[i]))) + return FALSE; + + format_set_flag(format, WINED3DFMT_FLAG_ALLOW_FETCH4); + } + return TRUE; }
@@ -5780,6 +5802,7 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].tmp_dst = 0; settings->op[i].tex_type = WINED3D_GL_RES_TYPE_TEX_1D; settings->op[i].projected = WINED3D_PROJECTION_NONE; + settings->op[i].fetch4 = FALSE; i++; break; } @@ -5923,6 +5946,10 @@ void gen_ffp_frag_op(const struct wined3d_context *context, const struct wined3d settings->op[i].aarg1 = aarg1; settings->op[i].aarg2 = aarg2; settings->op[i].tmp_dst = state->texture_states[i][WINED3D_TSS_RESULT_ARG] == WINED3DTA_TEMP; + settings->op[i].fetch4 = (state->textures[i] + && state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && settings->op[i].tex_type & (WINED3D_GL_RES_TYPE_TEX_2D | WINED3D_GL_RES_TYPE_TEX_RECT)); }
/* Clear unsupported stages */ diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 1e3ec28d6b..4224461142 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -2747,7 +2747,8 @@ struct texture_stage_op unsigned tex_type : 3; unsigned tmp_dst : 1; unsigned projected : 2; - unsigned padding : 10; + unsigned fetch4 : 1; + unsigned padding : 9; };
struct ffp_frag_settings @@ -4434,6 +4435,7 @@ extern enum wined3d_format_id pixelformat_for_depth(DWORD depth) DECLSPEC_HIDDEN #define WINED3DFMT_FLAG_VERTEX_ATTRIBUTE 0x01000000 #define WINED3DFMT_FLAG_BLIT 0x02000000 #define WINED3DFMT_FLAG_MAPPABLE 0x04000000 +#define WINED3DFMT_FLAG_ALLOW_FETCH4 0x08000000
struct wined3d_rational {
Am 11.02.2019 um 14:48 schrieb Daniel Ansorregui [email protected]:
@@ -9879,6 +9883,7 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_1D: texture_function = "texture1D"; coord_mask = "x";
fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_2D: texture_function = "texture2D";
@@ -9887,6 +9892,9 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * case WINED3D_GL_RES_TYPE_TEX_3D: texture_function = "texture3D"; coord_mask = "xyz";
if (fetch4)
FIXME("Unsupported Fetch4 and texture3D sampling");
fetch4 = FALSE; break; case WINED3D_GL_RES_TYPE_TEX_CUBE: texture_function = "textureCube";
@@ -9901,11 +9909,24 @@ static GLuint shader_glsl_generate_ffp_fragment_shader(struct shader_glsl_priv * texture_function = ""; coord_mask = "xyzw"; proj = FALSE;
fetch4 = FALSE;
I think this is dead code, now that you are filtering for WINED3D_GL_RES_TYPE_TEX_2D or WINED3D_GL_RES_TYPE_TEX_RECT in gen_ffp_frag_op?
else if (settings->op[stage].projected == WINED3D_PROJECTION_COUNT3) {
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz);\n",
stage, texture_function, proj ? "Proj" : "", stage, stage);
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].xyz%s);\n", stage,
texture_function, proj ? "Proj" : "", stage, stage, offset.buffer); } else {
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s);\n",
stage, texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : "");
shader_addline(buffer, "tex%u = %s%s(ps_sampler%u, ffp_texcoord[%u].%s%s", stage,
texture_function, proj ? "Proj" : "", stage, stage, coord_mask, proj ? "w" : "");
if (fetch4_proj)
shader_addline(buffer, " / ffp_texcoord[%u].w", stage);
shader_addline(buffer, "%s);\n", offset.buffer);
Why don't you have to do the manual divide in the WINED3D_PROJECTION_COUNT3 case too? (in that case, divide by /z)? Your code may be right, I am not 100% sure about it.
- Implement shader generation of tex-ld/ldp/ldd/ldb/ldl - FIXME: Vertex texldl unimplemented yet, since it is not possible to access ps_compile_args. Maybe move it to another place. Probably does not work on windows anyway - Trigger PS re-generation at FETCH4 state change, by storing a context flag - Add ps_compile_arg flag for fetch4
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/cs.c | 5 ++++- dlls/wined3d/device.c | 4 ++++ dlls/wined3d/glsl_shader.c | 41 +++++++++++++++++++++++++++++++--- dlls/wined3d/shader.c | 10 +++++++++ dlls/wined3d/state.c | 12 ++++++++++ dlls/wined3d/wined3d_private.h | 15 +++++++++++-- 6 files changed, 81 insertions(+), 6 deletions(-)
diff --git a/dlls/wined3d/cs.c b/dlls/wined3d/cs.c index 7471d24e7e..c97083ab8e 100644 --- a/dlls/wined3d/cs.c +++ b/dlls/wined3d/cs.c @@ -1372,7 +1372,10 @@ static void wined3d_cs_exec_set_texture(struct wined3d_cs *cs, const void *data) if (!prev || wined3d_texture_gl(op->texture)->target != wined3d_texture_gl(prev)->target || (!is_same_fixup(new_format->color_fixup, old_format->color_fixup) && !(can_use_texture_swizzle(gl_info, new_format) && can_use_texture_swizzle(gl_info, old_format))) - || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW)) + || (new_fmt_flags & WINED3DFMT_FLAG_SHADOW) != (old_fmt_flags & WINED3DFMT_FLAG_SHADOW) + || ((new_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) != (old_fmt_flags & WINED3DFMT_FLAG_ALLOW_FETCH4) + && cs->state.sampler_states[op->texture->sampler][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'))) device_invalidate_state(cs->device, STATE_SHADER(WINED3D_SHADER_TYPE_PIXEL));
if (!prev && op->stage < d3d_info->limits.ffp_blend_stages) diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index dee99dcde2..99248ffb1b 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -2082,7 +2082,11 @@ void CDECL wined3d_device_set_sampler_state(struct wined3d_device *device, device, sampler_idx, debug_d3dsamplerstate(state), value);
if (sampler_idx >= WINED3DVERTEXTEXTURESAMPLER0 && sampler_idx <= WINED3DVERTEXTEXTURESAMPLER3) + { sampler_idx -= (WINED3DVERTEXTEXTURESAMPLER0 - MAX_FRAGMENT_SAMPLERS); + if (state == WINED3D_SAMP_MIPMAP_LOD_BIAS && value == MAKEFOURCC('G','E','T','4')) + FIXME("Unsupported FETCH4 and Vertex Texture Sampler"); + }
if (sampler_idx >= ARRAY_SIZE(device->state.sampler_states)) { diff --git a/dlls/wined3d/glsl_shader.c b/dlls/wined3d/glsl_shader.c index 1950db06a9..074f9f0ab7 100644 --- a/dlls/wined3d/glsl_shader.c +++ b/dlls/wined3d/glsl_shader.c @@ -104,6 +104,8 @@ struct glsl_sample_function enum wined3d_data_type data_type; BOOL output_single_component; unsigned int offset_size; + BOOL fetch4_enabled; + BOOL fetch4_projected; };
enum heap_node_op @@ -3613,6 +3615,7 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; BOOL offset = flags & WINED3D_GLSL_SAMPLE_OFFSET; + BOOL gather = !shadow && priv->cur_ps_args && priv->cur_ps_args->fetch4 & (1u << sampler_idx); const char *base = "texture", *type_part = "", *suffix = ""; unsigned int coord_size, deriv_size;
@@ -3658,6 +3661,16 @@ static void shader_glsl_get_sample_function(const struct wined3d_shader_context type_part = ""; }
+ sample_function->fetch4_projected = projected; + sample_function->fetch4_enabled = gather; + if (gather) + { + base = "texture"; + type_part = "Gather"; + suffix = ""; + projected = lod = grad = offset = FALSE; + } + sample_function->name = string_buffer_get(priv->string_buffers); string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "", lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix); @@ -3792,13 +3805,18 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ const char *coord_reg_fmt, ...) { const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version; + const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; + DWORD merged_swizzle = swizzle; char dst_swizzle[6]; struct color_fixup_desc fixup; BOOL np2_fixup = FALSE; va_list args; int ret;
- shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle); + /* Merge swizzle requested with the fetch4 swizzle */ + if (sample_function->fetch4_enabled) + merged_swizzle = wined3d_merge_swizzle(WINED3DSP_FETCH4_SWIZZLE, swizzle); + shader_glsl_swizzle_to_str(merged_swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
/* If ARB_texture_swizzle is supported we don't need to do anything here. * We actually rely on it for vertex shaders and SM4+. */ @@ -3836,7 +3854,6 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_
if (np2_fixup) { - const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
switch (shader_glsl_get_write_mask_size(sample_function->coord_mask)) @@ -3859,7 +3876,21 @@ static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_ break; } } - if (dx && dy) + if (sample_function->fetch4_enabled) + { + if (sample_function->fetch4_projected) + { + struct wined3d_string_buffer *reg_name = string_buffer_get(priv->string_buffers); + shader_glsl_get_register_name(&ins->src[0].reg, ins->src[0].reg.data_type, reg_name, NULL, ins->ctx); + shader_addline(ins->ctx->buffer, " / %s.w", reg_name->buffer); + string_buffer_release(priv->string_buffers, reg_name); + } + + /* Correct the fetch4 0.5 texel offset */ + shader_addline(ins->ctx->buffer, " + (vec2(0.5) / textureSize(%s_sampler%u, 0).xy)", + shader_glsl_get_prefix(version->type), sampler_bind_idx); + } + else if (dx && dy) shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy); else if (bias) shader_addline(ins->ctx->buffer, ", %s", bias); @@ -5397,6 +5428,10 @@ static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) } }
+ /* When fetch4 is active, projection is done manually */ + if (priv->cur_ps_args->fetch4 & (1u << resource_idx)) + mask = 0; + shader_glsl_get_sample_function(ins->ctx, resource_idx, resource_idx, sample_flags, &sample_function); mask |= sample_function.coord_mask; sample_function.coord_mask = mask; diff --git a/dlls/wined3d/shader.c b/dlls/wined3d/shader.c index e11a37cf07..ede51cfa7d 100644 --- a/dlls/wined3d/shader.c +++ b/dlls/wined3d/shader.c @@ -4027,6 +4027,16 @@ void find_ps_compile_args(const struct wined3d_state *state, const struct wined3 } }
+ for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) + { + if (state->sampler_states[i][WINED3D_SAMP_MIPMAP_LOD_BIAS] == MAKEFOURCC('G','E','T','4') + && state->textures[i]->resource.format_flags & WINED3DFMT_FLAG_ALLOW_FETCH4 + && shader->reg_maps.resource_info[i].type == WINED3D_SHADER_RESOURCE_TEXTURE_2D) + args->fetch4 |= 1 << i; + else + args->fetch4 &= ~(1 << i); + } + if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info)) { const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX]; diff --git a/dlls/wined3d/state.c b/dlls/wined3d/state.c index 8708aa09b3..03d623725e 100644 --- a/dlls/wined3d/state.c +++ b/dlls/wined3d/state.c @@ -3616,6 +3616,8 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state { struct wined3d_texture_gl *texture_gl = wined3d_texture_gl(state->textures[sampler_idx]); BOOL srgb = state->sampler_states[sampler_idx][WINED3D_SAMP_SRGB_TEXTURE]; + BOOL fetch4 = state->sampler_states[sampler_idx][WINED3D_SAMP_MIPMAP_LOD_BIAS] + == MAKEFOURCC('G','E','T','4'); const DWORD *sampler_states = state->sampler_states[sampler_idx]; struct wined3d_device *device = context->device; struct wined3d_sampler_desc desc; @@ -3650,6 +3652,16 @@ static void sampler(struct wined3d_context *context, const struct wined3d_state /* Trigger shader constant reloading (for NP2 texcoord fixup) */ if (!(texture_gl->t.flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) context->constant_update_mask |= WINED3D_SHADER_CONST_PS_NP2_FIXUP; + + /* Trigger pixel shader recompilation for FETCH4 changes */ + if (((context->last_was_fetch4 >> sampler_idx) & 0x1) ^ fetch4) + { + if (fetch4) + context->last_was_fetch4 |= 1 << sampler_idx; + else + context->last_was_fetch4 &= ~(1 << sampler_idx); + context->shader_update_mask = (1u << WINED3D_SHADER_TYPE_PIXEL); + } } else { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 4224461142..24696ed477 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -521,7 +521,16 @@ enum wined3d_immconst_type WINED3D_IMMCONST_VEC4, };
-#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_NOSWIZZLE (0u | (1u << 2) | (2u << 4) | (3u << 6)) +#define WINED3DSP_FETCH4_SWIZZLE (2u | (0u << 2) | (1u << 4) | (3u << 6)) /* zxyw */ + +static inline DWORD wined3d_merge_swizzle(const DWORD a, const DWORD b) +{ + return (((a >> ((b >> 0) & 0x3) * 2) & 0x3) << 0) + + (((a >> ((b >> 2) & 0x3) * 2) & 0x3) << 2) + + (((a >> ((b >> 4) & 0x3) * 2) & 0x3) << 4) + + (((a >> ((b >> 6) & 0x3) * 2) & 0x3) << 6); +}
enum wined3d_shader_src_modifier { @@ -1358,7 +1367,8 @@ struct ps_compile_args DWORD alpha_test_func : 3; DWORD render_offscreen : 1; DWORD rt_alpha_swizzle : 8; /* MAX_RENDER_TARGET_VIEWS, 8 */ - DWORD padding : 18; + WORD fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS, 16 */ + DWORD padding : 2; };
enum fog_src_type @@ -1893,6 +1903,7 @@ struct wined3d_context DWORD last_was_ffp_blit : 1; DWORD last_was_blit : 1; DWORD last_was_ckey : 1; + DWORD last_was_fetch4 : 16; /* MAX_FRAGMENT_SAMPLERS */ DWORD fog_coord : 1; DWORD fog_enabled : 1; DWORD current : 1;
- This assumes FETCH4 is already supported by wine And checks for ARB_texture_gather support before exposing DF24
Signed-off-by: Daniel Ansorregui [email protected] --- dlls/wined3d/directx.c | 6 ++++++ dlls/wined3d/utils.c | 29 +++++++++++++++++++++++++++++ include/wine/wined3d.h | 2 ++ 3 files changed, 37 insertions(+)
diff --git a/dlls/wined3d/directx.c b/dlls/wined3d/directx.c index 75622be80c..be0c514a78 100644 --- a/dlls/wined3d/directx.c +++ b/dlls/wined3d/directx.c @@ -1723,6 +1723,12 @@ HRESULT CDECL wined3d_check_device_format(const struct wined3d *wined3d, UINT ad return WINED3DOK_NOMIPGEN; }
+ if ((check_format_id == WINED3DFMT_DF24) && !adapter->gl_info.supported[ARB_TEXTURE_GATHER]) + { + TRACE("No Support for Fetch4 disabling DF24 support.\n"); + return WINED3DERR_NOTAVAILABLE; + } + return WINED3D_OK; }
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index b0cae022c6..d8fd3ee6dd 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -65,6 +65,8 @@ format_index_remap[] = {WINED3DFMT_R16, WINED3D_FORMAT_FOURCC_BASE + 20}, {WINED3DFMT_AL16, WINED3D_FORMAT_FOURCC_BASE + 21}, {WINED3DFMT_NV12, WINED3D_FORMAT_FOURCC_BASE + 22}, + {WINED3DFMT_DF16, WINED3D_FORMAT_FOURCC_BASE + 23}, + {WINED3DFMT_DF24, WINED3D_FORMAT_FOURCC_BASE + 24}, };
#define WINED3D_FORMAT_COUNT (WINED3D_FORMAT_FOURCC_BASE + ARRAY_SIZE(format_index_remap)) @@ -141,6 +143,8 @@ static const struct wined3d_format_channels formats[] = {WINED3DFMT_NVHU, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NVHS, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_NULL, 8, 8, 8, 8, 0, 8, 16, 24, 4, 0, 0}, + {WINED3DFMT_DF16, 0, 0, 0, 0, 0, 0, 0, 0, 2, 16, 0}, + {WINED3DFMT_DF24, 0, 0, 0, 0, 0, 0, 0, 0, 4, 24, 0}, /* Unsure about them, could not find a Windows driver that supports them */ {WINED3DFMT_R16, 16, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0}, {WINED3DFMT_AL16, 0, 0, 0, 16, 0, 0, 0, 16, 4, 0, 0}, @@ -352,6 +356,8 @@ static const enum wined3d_format_id wined3d_format_fetch4_enabled[] = WINED3DFMT_R16, WINED3DFMT_R32_FLOAT, WINED3DFMT_A8_UNORM, + WINED3DFMT_DF16, + WINED3DFMT_DF24, WINED3DFMT_INTZ, };
@@ -1901,6 +1907,24 @@ static const struct wined3d_format_texture_info format_texture_info[] = WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING | WINED3DFMT_FLAG_DEPTH | WINED3DFMT_FLAG_STENCIL, ARB_FRAMEBUFFER_OBJECT, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF16, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT16, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH_COMPONENT, GL_DEPTH_COMPONENT, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_DEPTH, + WINED3D_GL_EXT_NONE, NULL}, + {WINED3DFMT_DF24, GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT24, 0, + GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 0, + WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_POSTPIXELSHADER_BLENDING | WINED3DFMT_FLAG_FILTERING + | WINED3DFMT_FLAG_DEPTH, + ARB_DEPTH_TEXTURE, NULL}, {WINED3DFMT_NULL, 0, 0, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 0, WINED3DFMT_FLAG_TEXTURE | WINED3DFMT_FLAG_RENDERTARGET | WINED3DFMT_FLAG_FBO_ATTACHABLE, @@ -3549,6 +3573,9 @@ static void apply_format_fixups(struct wined3d_adapter *adapter, struct wined3d_ {WINED3DFMT_INTZ, "XXXX", FALSE, WINED3D_GL_EXT_NONE}, {WINED3DFMT_INTZ, "XYZW", FALSE, WINED3D_GL_LEGACY_CONTEXT},
+ {WINED3DFMT_DF16, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_DF24, "X001", FALSE, WINED3D_GL_EXT_NONE}, + {WINED3DFMT_L8_UNORM, "XXX1", FALSE, ARB_TEXTURE_RG}, };
@@ -4403,6 +4430,8 @@ const char *debug_d3dformat(enum wined3d_format_id format_id) FMT_TO_STR(WINED3DFMT_R16); FMT_TO_STR(WINED3DFMT_AL16); FMT_TO_STR(WINED3DFMT_NV12); + FMT_TO_STR(WINED3DFMT_DF16); + FMT_TO_STR(WINED3DFMT_DF24); #undef FMT_TO_STR default: { diff --git a/include/wine/wined3d.h b/include/wine/wined3d.h index e09e4e1fce..e6c3c1b802 100644 --- a/include/wine/wined3d.h +++ b/include/wine/wined3d.h @@ -271,6 +271,8 @@ enum wined3d_format_id WINED3DFMT_R16 = WINEMAKEFOURCC(' ','R','1','6'), WINED3DFMT_AL16 = WINEMAKEFOURCC('A','L','1','6'), WINED3DFMT_NV12 = WINEMAKEFOURCC('N','V','1','2'), + WINED3DFMT_DF16 = WINEMAKEFOURCC('D','F','1','6'), + WINED3DFMT_DF24 = WINEMAKEFOURCC('D','F','2','4'),
WINED3DFMT_FORCE_DWORD = 0xffffffff };
Hi,
Some quick Windows test results:
r500 fails because the returned color for ATI1N is 1 out of the slop. See r500.txt.
evergreen still fails, presumably because there's a copypaste typo in this line:
D3DCOLOR color_off = expected_colors.color_intel[j];
See evergreen.txt. Changing it to expected_colors.color_fetch4_off[j] makes evergreen happy.
I'll look at the implementation later and double-check the results on my third AMD card.
Stefan
Am 11.02.19 um 14:48 schrieb Daniel Ansorregui:
- Test texld/texldp/texldd/texldb/texldl in PS and FFP
- Test supported/unsupported texture formats on FFP/texld/texldp
- Test 3dtextures (Disabled, each platform has different results)
- Test depth textures DF16/DF24 with fetch4 on PS (FFP is broken on windows)
Signed-off-by: Daniel Ansorregui [email protected]
dlls/d3d9/tests/visual.c | 679 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 679 insertions(+)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index c06acb77d4..5ccced40be 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -15234,6 +15234,684 @@ done: DestroyWindow(window); }
+static void fetch4_test(void) +{
- static const DWORD vs_code[] =
- {
0xfffe0300, /* vs_3_0 */
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
0x0200001f, 0x80000005, 0x900f0001, /* dcl_texcoord v1 */
0x0200001f, 0x80000000, 0xe00f0000, /* dcl_position o0 */
0x0200001f, 0x80000005, 0xe00f0001, /* dcl_texcoord o1 */
0x02000001, 0xe00f0000, 0x90e40000, /* mov o0, v0 */
0x02000001, 0xe00f0001, 0x90e40001, /* mov o1, v1 */
0x0000ffff
- };
- static const DWORD ps_code_texld[] =
- {
/* Test texld */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x03000042, 0x800f0000, 0x90e40000, 0xa0e40800, /* texld r0, v0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff /* end */
- };
- static const DWORD ps_code_texldp[] =
- {
/* Test texldp : AMD and Wine uses the projection on Fetch4, Intel UHD 620 does not apply it */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0 */
0x05000051, 0xa00f0000, 0x00000000, 0x00000000, 0x00000000, 0x40000000, /* def c0, 0.0, 0.0, 0.0, 2.0 */
0x02000001, 0x80030000, 0x90540000, /* mov r0.xy, v0.xyyy */
0x02000001, 0x800c0000, 0xa0fe0000, /* mov r0.zw, c0.zwww */
0x03010042, 0x800f0000, 0x80e40000, 0xa0e40800, /* texldp r0, r0, s0 */
0x02000001, 0x800f0800, 0x80e40000, /* mov oC0, r0 */
0x0000ffff, /* end */
- };
- static const DWORD ps_code_texldd[] =
- {
/* Test texldd : Fetch4 uses the same D3D state as LOD bias, therefore disables LOD.
* Sampling LOD gradient should be ignored. Same result as texld */
/* NOTE: Radeon HD 5700 driver 8.17.10.1404 disables Fetch4 on texldb */
0xffff0300, /* ps_3_0 */
0x0200001f, 0x80000005, 0x900f0000, /* dcl_texcoord v0 */
0x0200001f, 0x90000000, 0xa00f0800, /* dcl_2d s0