This patchset implements delayed clears in the Vulkan renderer with arbitrary colors. The Vulkan side of it is fairly simple - most of the patches extend the CPU fallback path to support those colors.
The final patch adds a d3d9 test to exercise the fallback code. The Vk renderer is functional enough in d3d9 to run this particular test. I don't see a way to reliably test this codepath through d3d10/11 without quirky assumptions about CopyResource. Dynamic and staging resources can't be written by the GPU, and default resources can't be read by the CPU.
Stefan Dösinger (9): wined3d: Separate mapping from data writing in surface_cpu_blt_colour_fill. wined3d: Prepare wined3d_format_convert_from_float for > 32bpp formats. wined3d: Add support for sysmem-clearing float32 formats. wined3d: Support CPU clear of float16 formats. wined3d: Add 16 bit per channel UNORM formats to wined3d_format_convert_from_float. wined3d: Add a memory_colour_fill for clearing the entire level to zero. wined3d: Prepare sysmem LOCATION_CLEARED handling for non-zero colors. wined3d: Store clear colors in subresources. d3d9/tests: Extend color_fill_test.
dlls/d3d9/tests/visual.c | 132 ++++++++++++++++++++++------ dlls/wined3d/context_vk.c | 57 ++++++++++-- dlls/wined3d/device.c | 7 +- dlls/wined3d/resource.c | 112 +++++++++++++++++++++++ dlls/wined3d/surface.c | 139 +---------------------------- dlls/wined3d/texture.c | 52 +++++++---- dlls/wined3d/utils.c | 156 +++++++++++++++++++++++++-------- dlls/wined3d/wined3d_private.h | 91 +++++++++++++++++-- 8 files changed, 514 insertions(+), 232 deletions(-)
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/resource.c | 90 ++++++++++++++++++++++++++++++++++ dlls/wined3d/surface.c | 76 +--------------------------- dlls/wined3d/wined3d_private.h | 3 ++ 3 files changed, 95 insertions(+), 74 deletions(-)
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index 437f9c9da69..a21949e79fd 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -613,3 +613,93 @@ void *resource_offset_map_pointer(struct wined3d_resource *resource, unsigned in + (box->left * format->byte_count); } } + +void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, + unsigned int level, const struct wined3d_map_desc *map, + const struct wined3d_color *colour, const struct wined3d_box *box) +{ + const struct wined3d_format *format = resource->format; + unsigned int w, h, d, x, y, z, bpp; + struct wined3d_box level_box; + uint8_t *dst, *dst2; + DWORD c; + + if (resource->type == WINED3D_RTYPE_BUFFER) + { + level_box.left = 0; level_box.top = 0; level_box.front = 0; + level_box.right = resource->width; level_box.bottom = 1; level_box.back = 1; + } + else + { + wined3d_texture_get_level_box(texture_from_resource(resource), level, &level_box); + } + + w = min(box->right, level_box.right) - min(box->left, level_box.right); + h = min(box->bottom, level_box.bottom) - min(box->top, level_box.bottom); + if (resource->type != WINED3D_RTYPE_TEXTURE_3D) + d = 1; + else + d = min(box->back, level_box.back) - min(box->front, level_box.back); + + dst = (BYTE *)map->data + + (box->front * map->slice_pitch) + + ((box->top / format->block_height) * map->row_pitch) + + ((box->left / format->block_width) * format->block_byte_count); + + c = wined3d_format_convert_from_float(format, colour); + bpp = format->byte_count; + + switch (bpp) + { + case 1: + for (x = 0; x < w; ++x) + { + dst[x] = c; + } + break; + + case 2: + for (x = 0; x < w; ++x) + { + ((WORD *)dst)[x] = c; + } + break; + + case 3: + { + dst2 = dst; + for (x = 0; x < w; ++x, dst += 3) + { + dst2[0] = (c ) & 0xff; + dst2[1] = (c >> 8) & 0xff; + dst2[2] = (c >> 16) & 0xff; + } + break; + } + case 4: + for (x = 0; x < w; ++x) + { + ((DWORD *)dst)[x] = c; + } + break; + + default: + FIXME("Not implemented for bpp %u.\n", bpp); + return; + } + + dst2 = dst; + for (y = 1; y < h; ++y) + { + dst2 += map->row_pitch; + memcpy(dst2, dst, w * bpp); + } + + dst2 = dst; + for (z = 1; z < d; ++z) + { + dst2 += map->slice_pitch; + memcpy(dst2, dst, w * h * bpp); + } + +} diff --git a/dlls/wined3d/surface.c b/dlls/wined3d/surface.c index 5feeba3be2d..afe6ea6e5ba 100644 --- a/dlls/wined3d/surface.c +++ b/dlls/wined3d/surface.c @@ -1241,15 +1241,13 @@ static void surface_cpu_blt_colour_fill(struct wined3d_rendertarget_view *view, const struct wined3d_box *box, const struct wined3d_color *colour) { struct wined3d_device *device = view->resource->device; - unsigned int x, y, z, w, h, d, bpp, level; struct wined3d_context *context; struct wined3d_texture *texture; struct wined3d_bo_address data; struct wined3d_map_desc map; struct wined3d_range range; + unsigned int level; DWORD map_binding; - uint8_t *dst; - DWORD c;
TRACE("view %p, box %s, colour %s.\n", view, debug_box(box), debug_color(colour));
@@ -1274,20 +1272,6 @@ static void surface_cpu_blt_colour_fill(struct wined3d_rendertarget_view *view, texture = texture_from_resource(view->resource); level = view->sub_resource_idx % texture->level_count;
- c = wined3d_format_convert_from_float(view->format, colour); - bpp = view->format->byte_count; - w = min(box->right, view->width) - min(box->left, view->width); - h = min(box->bottom, view->height) - min(box->top, view->height); - if (view->resource->type != WINED3D_RTYPE_TEXTURE_3D) - { - d = 1; - } - else - { - d = wined3d_texture_get_level_depth(texture, level); - d = min(box->back, d) - min(box->front, d); - } - map_binding = texture->resource.map_binding; if (!wined3d_texture_load_location(texture, view->sub_resource_idx, context, map_binding)) ERR("Failed to load the sub-resource into %s.\n", wined3d_debug_location(map_binding)); @@ -1296,66 +1280,10 @@ static void surface_cpu_blt_colour_fill(struct wined3d_rendertarget_view *view, wined3d_texture_get_bo_address(texture, view->sub_resource_idx, &data, map_binding); map.data = wined3d_context_map_bo_address(context, &data, texture->sub_resources[view->sub_resource_idx].size, WINED3D_MAP_WRITE); - map.data = (BYTE *)map.data - + (box->front * map.slice_pitch) - + ((box->top / view->format->block_height) * map.row_pitch) - + ((box->left / view->format->block_width) * view->format->block_byte_count); range.offset = 0; range.size = texture->sub_resources[view->sub_resource_idx].size;
- switch (bpp) - { - case 1: - for (x = 0; x < w; ++x) - { - ((BYTE *)map.data)[x] = c; - } - break; - - case 2: - for (x = 0; x < w; ++x) - { - ((WORD *)map.data)[x] = c; - } - break; - - case 3: - { - dst = map.data; - for (x = 0; x < w; ++x, dst += 3) - { - dst[0] = (c ) & 0xff; - dst[1] = (c >> 8) & 0xff; - dst[2] = (c >> 16) & 0xff; - } - break; - } - case 4: - for (x = 0; x < w; ++x) - { - ((DWORD *)map.data)[x] = c; - } - break; - - default: - FIXME("Not implemented for bpp %u.\n", bpp); - wined3d_resource_unmap(view->resource, view->sub_resource_idx); - return; - } - - dst = map.data; - for (y = 1; y < h; ++y) - { - dst += map.row_pitch; - memcpy(dst, map.data, w * bpp); - } - - dst = map.data; - for (z = 1; z < d; ++z) - { - dst += map.slice_pitch; - memcpy(dst, map.data, w * h * bpp); - } + wined3d_resource_memory_colour_fill(view->resource, level, &map, colour, box);
wined3d_context_unmap_bo_address(context, &data, 1, &range); context_release(context); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 3a12f5ef261..e19eb476153 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4366,6 +4366,9 @@ GLbitfield wined3d_resource_gl_storage_flags(const struct wined3d_resource *reso BOOL wined3d_resource_is_offscreen(struct wined3d_resource *resource) DECLSPEC_HIDDEN; BOOL wined3d_resource_prepare_sysmem(struct wined3d_resource *resource) DECLSPEC_HIDDEN; void wined3d_resource_update_draw_binding(struct wined3d_resource *resource) DECLSPEC_HIDDEN; +void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, + unsigned int level, const struct wined3d_map_desc *map, + const struct wined3d_color *colour, const struct wined3d_box *box) DECLSPEC_HIDDEN;
/* Tests show that the start address of resources is 32 byte aligned */ #define RESOURCE_ALIGNMENT 16
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
+void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource,
unsigned int level, const struct wined3d_map_desc *map,
const struct wined3d_color *colour, const struct wined3d_box *box)
+{
- const struct wined3d_format *format = resource->format;
- unsigned int w, h, d, x, y, z, bpp;
- struct wined3d_box level_box;
- uint8_t *dst, *dst2;
- DWORD c;
- if (resource->type == WINED3D_RTYPE_BUFFER)
- {
level_box.left = 0; level_box.top = 0; level_box.front = 0;
level_box.right = resource->width; level_box.bottom = 1; level_box.back = 1;
- }
- else
- {
wined3d_texture_get_level_box(texture_from_resource(resource), level, &level_box);
- }
- w = min(box->right, level_box.right) - min(box->left, level_box.right);
- h = min(box->bottom, level_box.bottom) - min(box->top, level_box.bottom);
- if (resource->type != WINED3D_RTYPE_TEXTURE_3D)
d = 1;
- else
d = min(box->back, level_box.back) - min(box->front, level_box.back);
This (i.e., clipping the supplied box against the resource dimensions) doesn't belong here; the caller should simply supply the correct box. Arguably it doesn't belong in surface_cpu_blt_colour_fill() either; the most appropriate place would be cpu_blitter_clear().
- dst = (BYTE *)map->data
+ (box->front * map->slice_pitch)
+ ((box->top / format->block_height) * map->row_pitch)
+ ((box->left / format->block_width) * format->block_byte_count);
We might as well cast to "uint8_t *". (And use uint16_t and uint32_t further below.)
- c = wined3d_format_convert_from_float(format, colour);
- bpp = format->byte_count;
- switch (bpp)
- {
case 1:
for (x = 0; x < w; ++x)
{
dst[x] = c;
}
break;
case 2:
for (x = 0; x < w; ++x)
{
((WORD *)dst)[x] = c;
}
break;
case 3:
{
dst2 = dst;
for (x = 0; x < w; ++x, dst += 3)
{
dst2[0] = (c ) & 0xff;
dst2[1] = (c >> 8) & 0xff;
dst2[2] = (c >> 16) & 0xff;
}
break;
}
Does that do the right thing? We update "dst", but not "dst2".
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/device.c | 7 +++-- dlls/wined3d/resource.c | 16 +++++----- dlls/wined3d/utils.c | 55 +++++++++++++++++++++++----------- dlls/wined3d/wined3d_private.h | 4 +-- 4 files changed, 53 insertions(+), 29 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 8937c7fc0fc..3e16370a580 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3436,6 +3436,7 @@ static HRESULT process_vertices_strided(const struct wined3d_device *device, DWO unsigned int vertex_size; BOOL do_clip, lighting; float min_z, max_z; + DWORD argb_color[4]; unsigned int i; BYTE *dest_ptr; HRESULT hr; @@ -3689,7 +3690,8 @@ static HRESULT process_vertices_strided(const struct wined3d_device *device, DWO diffuse_colour = material_diffuse; } wined3d_color_clamp(&diffuse_colour, &diffuse_colour, 0.0f, 1.0f); - *((DWORD *)dest_ptr) = wined3d_format_convert_from_float(output_colour_format, &diffuse_colour); + wined3d_format_convert_from_float(output_colour_format, &diffuse_colour, argb_color); + *((DWORD *)dest_ptr) = argb_color[0]; dest_ptr += sizeof(DWORD); }
@@ -3713,7 +3715,8 @@ static HRESULT process_vertices_strided(const struct wined3d_device *device, DWO } update_fog_factor(&specular_colour.a, &ls); wined3d_color_clamp(&specular_colour, &specular_colour, 0.0f, 1.0f); - *((DWORD *)dest_ptr) = wined3d_format_convert_from_float(output_colour_format, &specular_colour); + wined3d_format_convert_from_float(output_colour_format, &specular_colour, argb_color); + *((DWORD *)dest_ptr) = argb_color[0]; dest_ptr += sizeof(DWORD); }
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index a21949e79fd..cfbb60c3606 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -622,7 +622,7 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, unsigned int w, h, d, x, y, z, bpp; struct wined3d_box level_box; uint8_t *dst, *dst2; - DWORD c; + DWORD c[4];
if (resource->type == WINED3D_RTYPE_BUFFER) { @@ -646,7 +646,7 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, + ((box->top / format->block_height) * map->row_pitch) + ((box->left / format->block_width) * format->block_byte_count);
- c = wined3d_format_convert_from_float(format, colour); + wined3d_format_convert_from_float(format, colour, c); bpp = format->byte_count;
switch (bpp) @@ -654,14 +654,14 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, case 1: for (x = 0; x < w; ++x) { - dst[x] = c; + dst[x] = c[0]; } break;
case 2: for (x = 0; x < w; ++x) { - ((WORD *)dst)[x] = c; + ((WORD *)dst)[x] = c[0]; } break;
@@ -670,16 +670,16 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, dst2 = dst; for (x = 0; x < w; ++x, dst += 3) { - dst2[0] = (c ) & 0xff; - dst2[1] = (c >> 8) & 0xff; - dst2[2] = (c >> 16) & 0xff; + dst2[0] = (c[0] ) & 0xff; + dst2[1] = (c[0] >> 8) & 0xff; + dst2[2] = (c[0] >> 16) & 0xff; } break; } case 4: for (x = 0; x < w; ++x) { - ((DWORD *)dst)[x] = c; + ((DWORD *)dst)[x] = c[0]; } break;
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 401d815a365..23d5a0e30a7 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -6000,8 +6000,12 @@ uint32_t wined3d_format_pack(const struct wined3d_format *format, const struct w
/* Note: It's the caller's responsibility to ensure values can be expressed * in the requested format. UNORM formats for example can only express values - * in the range 0.0f -> 1.0f. */ -DWORD wined3d_format_convert_from_float(const struct wined3d_format *format, const struct wined3d_color *color) + * in the range 0.0f -> 1.0f. + * + * The code below assumes that no component crosses the 32 bit boundary (like + * e.g. a hypothetical, and totally braindead, B30G30R4 format would.) */ +void wined3d_format_convert_from_float(const struct wined3d_format *format, const struct wined3d_color *color, + DWORD ret[4]) { static const struct { @@ -6044,10 +6048,11 @@ DWORD wined3d_format_convert_from_float(const struct wined3d_format *format, con }; enum wined3d_format_id format_id = format->id; struct wined3d_color colour_srgb; + struct wined3d_uvec4 idx, shift; unsigned int i; - DWORD ret;
TRACE("Converting colour %s to format %s.\n", debug_color(color), debug_d3dformat(format_id)); + memset(ret, 0, sizeof(DWORD) * 4);
for (i = 0; i < ARRAY_SIZE(format_srgb_info); ++i) { @@ -6065,14 +6070,23 @@ DWORD wined3d_format_convert_from_float(const struct wined3d_format *format, con if (format_id != float_conv[i].format_id) continue;
- ret = ((DWORD)((color->r * float_conv[i].mul.x) + 0.5f)) << float_conv[i].shift.x; - ret |= ((DWORD)((color->g * float_conv[i].mul.y) + 0.5f)) << float_conv[i].shift.y; - ret |= ((DWORD)((color->b * float_conv[i].mul.z) + 0.5f)) << float_conv[i].shift.z; - ret |= ((DWORD)((color->a * float_conv[i].mul.w) + 0.5f)) << float_conv[i].shift.w; + idx.x = float_conv[i].shift.x / 32; + idx.y = float_conv[i].shift.y / 32; + idx.z = float_conv[i].shift.z / 32; + idx.w = float_conv[i].shift.w / 32; + shift.x = float_conv[i].shift.x % 32; + shift.y = float_conv[i].shift.y % 32; + shift.z = float_conv[i].shift.z % 32; + shift.w = float_conv[i].shift.w % 32; + + ret[idx.x] = ((DWORD)((color->r * float_conv[i].mul.x) + 0.5f)) << shift.x; + ret[idx.y] |= ((DWORD)((color->g * float_conv[i].mul.y) + 0.5f)) << shift.y; + ret[idx.z] |= ((DWORD)((color->b * float_conv[i].mul.z) + 0.5f)) << shift.z; + ret[idx.w] |= ((DWORD)((color->a * float_conv[i].mul.w) + 0.5f)) << shift.w;
- TRACE("Returning 0x%08x.\n", ret); + TRACE("Returning 0x%08x 0x%08x 0x%08x 0x%08x.\n", ret[0], ret[1], ret[2], ret[3]);
- return ret; + return; }
for (i = 0; i < ARRAY_SIZE(double_conv); ++i) @@ -6080,19 +6094,26 @@ DWORD wined3d_format_convert_from_float(const struct wined3d_format *format, con if (format_id != double_conv[i].format_id) continue;
- ret = ((DWORD)((color->r * double_conv[i].mul.x) + 0.5)) << double_conv[i].shift.x; - ret |= ((DWORD)((color->g * double_conv[i].mul.y) + 0.5)) << double_conv[i].shift.y; - ret |= ((DWORD)((color->b * double_conv[i].mul.z) + 0.5)) << double_conv[i].shift.z; - ret |= ((DWORD)((color->a * double_conv[i].mul.w) + 0.5)) << double_conv[i].shift.w; + idx.x = float_conv[i].shift.x / 32; + idx.y = float_conv[i].shift.y / 32; + idx.z = float_conv[i].shift.z / 32; + idx.w = float_conv[i].shift.w / 32; + shift.x = float_conv[i].shift.x % 32; + shift.y = float_conv[i].shift.y % 32; + shift.z = float_conv[i].shift.z % 32; + shift.w = float_conv[i].shift.w % 32; + + ret[idx.x] = ((DWORD)((color->r * double_conv[i].mul.x) + 0.5)) << double_conv[i].shift.x; + ret[idx.y] |= ((DWORD)((color->g * double_conv[i].mul.y) + 0.5)) << double_conv[i].shift.y; + ret[idx.z] |= ((DWORD)((color->b * double_conv[i].mul.z) + 0.5)) << double_conv[i].shift.z; + ret[idx.w] |= ((DWORD)((color->a * double_conv[i].mul.w) + 0.5)) << double_conv[i].shift.w;
- TRACE("Returning 0x%08x.\n", ret); + TRACE("Returning 0x%08x 0x%08x 0x%08x 0x%08x.\n", ret[0], ret[1], ret[2], ret[3]);
- return ret; + return; }
FIXME("Conversion for format %s not implemented.\n", debug_d3dformat(format_id)); - - return 0; }
static float color_to_float(DWORD color, DWORD size, DWORD offset) diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index e19eb476153..07275fe6359 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -6149,8 +6149,8 @@ void wined3d_format_calculate_pitch(const struct wined3d_format *format, unsigne unsigned int width, unsigned int height, unsigned int *row_pitch, unsigned int *slice_pitch) DECLSPEC_HIDDEN; UINT wined3d_format_calculate_size(const struct wined3d_format *format, UINT alignment, UINT width, UINT height, UINT depth) DECLSPEC_HIDDEN; -DWORD wined3d_format_convert_from_float(const struct wined3d_format *format, - const struct wined3d_color *color) DECLSPEC_HIDDEN; +void wined3d_format_convert_from_float(const struct wined3d_format *format, + const struct wined3d_color *color, DWORD ret[4]) DECLSPEC_HIDDEN; void wined3d_format_copy_data(const struct wined3d_format *format, const uint8_t *src, unsigned int src_row_pitch, unsigned int src_slice_pitch, uint8_t *dst, unsigned int dst_row_pitch, unsigned int dst_slice_pitch, unsigned int w, unsigned int h, unsigned int d) DECLSPEC_HIDDEN;
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
@@ -3689,7 +3690,8 @@ static HRESULT process_vertices_strided(const struct wined3d_device *device, DWO diffuse_colour = material_diffuse; } wined3d_color_clamp(&diffuse_colour, &diffuse_colour, 0.0f, 1.0f);
*((DWORD *)dest_ptr) = wined3d_format_convert_from_float(output_colour_format, &diffuse_colour);
wined3d_format_convert_from_float(output_colour_format, &diffuse_colour, argb_color);
*((DWORD *)dest_ptr) = argb_color[0]; dest_ptr += sizeof(DWORD); }
Why not "wined3d_format_convert_from_float(output_colour_format, &diffuse_colour, (uint32_t *)dest_ptr);"? This patch always writes 4 uint32_t's in wined3d_format_convert_from_float(), but that could easily be avoided. And arguably, if we're going to write to an output pointer in wined3d_format_convert_from_float(), that doesn't need to be a pointer to a 32-bit value either. (I.e., compare the upload/download/decompress operations from struct wined3d_format.)
Am Montag, 2. Mai 2022, 18:36:25 EAT schrieb Henri Verbeet:
This patch always writes 4 uint32_t's in wined3d_format_convert_from_float(), but that could easily be avoided.
I made it write 16 bytes unconditionally on purpose - I don't like the idea of the caller magically knowing which format requires how many output bytes. I am not married to that though, since in practise the caller has to put the returned result somewhere anyhow.
In practise writing format->byte_count would do, as long as we don't add support for block based formats. I am not sure it would really make things nicer though.
On Thu, 5 May 2022 at 15:23, Stefan Dösinger stefandoesinger@gmail.com wrote:
Am Montag, 2. Mai 2022, 18:36:25 EAT schrieb Henri Verbeet:
This patch always writes 4 uint32_t's in wined3d_format_convert_from_float(), but that could easily be avoided.
I made it write 16 bytes unconditionally on purpose - I don't like the idea of the caller magically knowing which format requires how many output bytes. I am not married to that though, since in practise the caller has to put the returned result somewhere anyhow.
In practise writing format->byte_count would do, as long as we don't add support for block based formats. I am not sure it would really make things nicer though.
Well, it allows using wined3d_format_convert_from_float() directly on the destination buffer, instead of always having to copy it from an intermediate variable. And of course the caller then still needs to know the size of the output to do that copy. I.e., "*((DWORD *)dest_ptr) = argb_color[0];" in the bit of code I was replying to only works because we know we have a 32-bit format.
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/resource.c | 6 ++++++ dlls/wined3d/utils.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+)
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index cfbb60c3606..8c3ca59bedc 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -682,6 +682,12 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, ((DWORD *)dst)[x] = c[0]; } break; + case 8: + case 12: + case 16: + for (x = 0; x < w; ++x) + memcpy(((uint8_t *)map->data) + x * bpp, c, bpp); + break;
default: FIXME("Not implemented for bpp %u.\n", bpp); diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 23d5a0e30a7..0820184ce05 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -6046,6 +6046,14 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons {WINED3DFMT_X8D24_UNORM, { 16777215.0, 0.0, 0.0, 0.0}, {0, 0, 0, 0}}, {WINED3DFMT_D32_UNORM, {4294967295.0, 0.0, 0.0, 0.0}, {0, 0, 0, 0}}, }; + enum wined3d_format_id float32_copy[] = + { + WINED3DFMT_D32_FLOAT, + WINED3DFMT_R32_FLOAT, + WINED3DFMT_R32G32_FLOAT, + WINED3DFMT_R32G32B32_FLOAT, + WINED3DFMT_R32G32B32A32_FLOAT, + }; enum wined3d_format_id format_id = format->id; struct wined3d_color colour_srgb; struct wined3d_uvec4 idx, shift; @@ -6113,6 +6121,27 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons return; }
+ for (i = 0; i < ARRAY_SIZE(float32_copy); ++i) + { + if (format_id != float32_copy[i]) + continue; + + switch(format->byte_count) + { + case 16: ((float *)ret)[3] = color->a; + case 12: ((float *)ret)[2] = color->b; + case 8: ((float *)ret)[1] = color->g; + case 4: ((float *)ret)[0] = color->r; + break; + + default: + ERR("Unexpected byte count %u: Format %s\n", format->byte_count, debug_d3dformat(format_id)); + break; + } + + return; + } + FIXME("Conversion for format %s not implemented.\n", debug_d3dformat(format_id)); }
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
@@ -6113,6 +6121,27 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons return; }
- for (i = 0; i < ARRAY_SIZE(float32_copy); ++i)
- {
if (format_id != float32_copy[i])
continue;
switch(format->byte_count)
Missing space after "switch".
Could we just check for WINED3DFMT_FLAG_FLOAT and the component sizes instead of listing individual formats?
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/surface.c | 63 ---------------------------- dlls/wined3d/utils.c | 33 +++++++++++++++ dlls/wined3d/wined3d_private.h | 75 ++++++++++++++++++++++++++++++---- 3 files changed, 101 insertions(+), 70 deletions(-)
diff --git a/dlls/wined3d/surface.c b/dlls/wined3d/surface.c index afe6ea6e5ba..1cdbc465691 100644 --- a/dlls/wined3d/surface.c +++ b/dlls/wined3d/surface.c @@ -39,69 +39,6 @@ static void get_color_masks(const struct wined3d_format *format, uint32_t *masks masks[2] = wined3d_mask_from_size(format->blue_size) << format->blue_offset; }
-/* See also float_16_to_32() in wined3d_private.h */ -static inline unsigned short float_32_to_16(const float *in) -{ - int exp = 0; - float tmp = fabsf(*in); - unsigned int mantissa; - unsigned short ret; - - /* Deal with special numbers */ - if (*in == 0.0f) - return 0x0000; - if (isnan(*in)) - return 0x7c01; - if (isinf(*in)) - return (*in < 0.0f ? 0xfc00 : 0x7c00); - - if (tmp < (float)(1u << 10)) - { - do - { - tmp = tmp * 2.0f; - exp--; - } while (tmp < (float)(1u << 10)); - } - else if (tmp >= (float)(1u << 11)) - { - do - { - tmp /= 2.0f; - exp++; - } while (tmp >= (float)(1u << 11)); - } - - mantissa = (unsigned int)tmp; - if (tmp - mantissa >= 0.5f) - ++mantissa; /* Round to nearest, away from zero. */ - - exp += 10; /* Normalize the mantissa. */ - exp += 15; /* Exponent is encoded with excess 15. */ - - if (exp > 30) /* too big */ - { - ret = 0x7c00; /* INF */ - } - else if (exp <= 0) - { - /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */ - while (exp <= 0) - { - mantissa = mantissa >> 1; - ++exp; - } - ret = mantissa & 0x3ff; - } - else - { - ret = (exp << 10) | (mantissa & 0x3ff); - } - - ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */ - return ret; -} - static void convert_r32_float_r16_float(const BYTE *src, BYTE *dst, DWORD pitch_in, DWORD pitch_out, unsigned int w, unsigned int h) { diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 0820184ce05..2bbf57ebff4 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -6054,6 +6054,13 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons WINED3DFMT_R32G32B32_FLOAT, WINED3DFMT_R32G32B32A32_FLOAT, }; + enum wined3d_format_id float16_conv[] = + { + WINED3DFMT_D32_FLOAT, + WINED3DFMT_R16_FLOAT, + WINED3DFMT_R16G16_FLOAT, + WINED3DFMT_R16G16B16A16_FLOAT, + }; enum wined3d_format_id format_id = format->id; struct wined3d_color colour_srgb; struct wined3d_uvec4 idx, shift; @@ -6142,6 +6149,32 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons return; }
+ for (i = 0; i < ARRAY_SIZE(float16_conv); ++i) + { + if (format_id != float16_conv[i]) + continue; + + switch(format->byte_count) + { + case 8: + ((short *)ret)[3] = float_32_to_16(&color->a); + ((short *)ret)[2] = float_32_to_16(&color->b); + /* fall through */ + case 4: + ((short *)ret)[1] = float_32_to_16(&color->g); + /* fall through */ + case 2: + ((short *)ret)[0] = float_32_to_16(&color->r); + break; + + default: + ERR("Unexpected byte count %u: Format %s\n", format->byte_count, debug_d3dformat(format_id)); + break; + } + + return; + } + FIXME("Conversion for format %s not implemented.\n", debug_d3dformat(format_id)); }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 07275fe6359..98a19e0fd65 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -354,13 +354,12 @@ static inline GLenum wined3d_gl_min_mip_filter(enum wined3d_texture_filter_type return minMipLookup[min_filter].mip[mip_filter]; }
-/* float_16_to_32() and float_32_to_16() (see implementation in - * surface_base.c) convert 16 bit floats in the FLOAT16 data type - * to standard C floats and vice versa. They do not depend on the encoding - * of the C float, so they are platform independent, but slow. On x86 and - * other IEEE 754 compliant platforms the conversion can be accelerated by - * bit shifting the exponent and mantissa. There are also some SSE-based - * assembly routines out there. +/* float_16_to_32() and float_32_to_16() convert 16 bit floats in the + * FLOAT16 data type to standard C floats and vice versa. They do not + * depend on the encoding of the C float, so they are platform independent, + * but slow. On x86 and other IEEE 754 compliant platforms the conversion + * can be accelerated by bit shifting the exponent and mantissa. There are + * also some SSE-based assembly routines out there. * * See GL_NV_half_float for a reference of the FLOAT16 / GL_HALF format */ @@ -404,6 +403,68 @@ static inline float float_24_to_32(DWORD in) } }
+static inline unsigned short float_32_to_16(const float *in) +{ + int exp = 0; + float tmp = fabsf(*in); + unsigned int mantissa; + unsigned short ret; + + /* Deal with special numbers */ + if (*in == 0.0f) + return 0x0000; + if (isnan(*in)) + return 0x7c01; + if (isinf(*in)) + return (*in < 0.0f ? 0xfc00 : 0x7c00); + + if (tmp < (float)(1u << 10)) + { + do + { + tmp = tmp * 2.0f; + exp--; + } while (tmp < (float)(1u << 10)); + } + else if (tmp >= (float)(1u << 11)) + { + do + { + tmp /= 2.0f; + exp++; + } while (tmp >= (float)(1u << 11)); + } + + mantissa = (unsigned int)tmp; + if (tmp - mantissa >= 0.5f) + ++mantissa; /* Round to nearest, away from zero. */ + + exp += 10; /* Normalize the mantissa. */ + exp += 15; /* Exponent is encoded with excess 15. */ + + if (exp > 30) /* too big */ + { + ret = 0x7c00; /* INF */ + } + else if (exp <= 0) + { + /* exp == 0: Non-normalized mantissa. Returns 0x0000 (=0.0) for too small numbers. */ + while (exp <= 0) + { + mantissa = mantissa >> 1; + ++exp; + } + ret = mantissa & 0x3ff; + } + else + { + ret = (exp << 10) | (mantissa & 0x3ff); + } + + ret |= ((*in < 0.0f ? 1 : 0) << 15); /* Add the sign */ + return ret; +} + static inline unsigned int wined3d_popcount(unsigned int x) { #if defined(__MINGW32__)
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
- for (i = 0; i < ARRAY_SIZE(float16_conv); ++i)
- {
if (format_id != float16_conv[i])
continue;
switch(format->byte_count)
{
case 8:
((short *)ret)[3] = float_32_to_16(&color->a);
((short *)ret)[2] = float_32_to_16(&color->b);
/* fall through */
case 4:
((short *)ret)[1] = float_32_to_16(&color->g);
/* fall through */
case 2:
((short *)ret)[0] = float_32_to_16(&color->r);
break;
default:
ERR("Unexpected byte count %u: Format %s\n", format->byte_count, debug_d3dformat(format_id));
break;
}
return;
- }
Like 3/9, it seems preferable to check for WINED3DFMT_FLAG_FLOAT and component sizes. There's a missing space after "switch" here as well.
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/utils.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-)
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 2bbf57ebff4..e53335c6744 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -6015,24 +6015,27 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons } float_conv[] = { - {WINED3DFMT_B8G8R8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, - {WINED3DFMT_B8G8R8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, - {WINED3DFMT_B8G8R8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, - {WINED3DFMT_B5G6R5_UNORM, { 31.0f, 63.0f, 31.0f, 0.0f}, {11, 5, 0, 0}}, - {WINED3DFMT_B5G5R5A1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}}, - {WINED3DFMT_B5G5R5X1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}}, - {WINED3DFMT_R8_UNORM, { 255.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, - {WINED3DFMT_A8_UNORM, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}}, - {WINED3DFMT_B4G4R4A4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}}, - {WINED3DFMT_B4G4R4X4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}}, - {WINED3DFMT_B2G3R3_UNORM, { 7.0f, 7.0f, 3.0f, 0.0f}, { 5, 2, 0, 0}}, - {WINED3DFMT_R8G8B8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}}, - {WINED3DFMT_R8G8B8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}}, - {WINED3DFMT_B10G10R10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, {20, 10, 0, 30}}, - {WINED3DFMT_R10G10B10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, { 0, 10, 20, 30}}, - {WINED3DFMT_P8_UINT, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}}, - {WINED3DFMT_S1_UINT_D15_UNORM, { 32767.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, - {WINED3DFMT_D16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_B8G8R8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, + {WINED3DFMT_B8G8R8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, + {WINED3DFMT_B8G8R8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}}, + {WINED3DFMT_B5G6R5_UNORM, { 31.0f, 63.0f, 31.0f, 0.0f}, {11, 5, 0, 0}}, + {WINED3DFMT_B5G5R5A1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}}, + {WINED3DFMT_B5G5R5X1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}}, + {WINED3DFMT_R8_UNORM, { 255.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_A8_UNORM, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_B4G4R4A4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}}, + {WINED3DFMT_B4G4R4X4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}}, + {WINED3DFMT_B2G3R3_UNORM, { 7.0f, 7.0f, 3.0f, 0.0f}, { 5, 2, 0, 0}}, + {WINED3DFMT_R8G8B8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}}, + {WINED3DFMT_R8G8B8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}}, + {WINED3DFMT_B10G10R10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, {20, 10, 0, 30}}, + {WINED3DFMT_R10G10B10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, { 0, 10, 20, 30}}, + {WINED3DFMT_R16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_R16G16_UNORM, { 65535.0f, 65535.0f, 0.0f, 0.0f}, { 0, 16, 0, 0}}, + {WINED3DFMT_R16G16B16A16_UNORM,{ 65535.0f, 65535.0f, 65535.0f,65535.0f}, { 0, 16, 32, 48}}, + {WINED3DFMT_P8_UINT, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_S1_UINT_D15_UNORM, { 32767.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, + {WINED3DFMT_D16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}}, }; static const struct {
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
@@ -6015,24 +6015,27 @@ void wined3d_format_convert_from_float(const struct wined3d_format *format, cons } float_conv[] = {
{WINED3DFMT_B8G8R8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B8G8R8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B8G8R8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B5G6R5_UNORM, { 31.0f, 63.0f, 31.0f, 0.0f}, {11, 5, 0, 0}},
{WINED3DFMT_B5G5R5A1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}},
{WINED3DFMT_B5G5R5X1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}},
{WINED3DFMT_R8_UNORM, { 255.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_A8_UNORM, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_B4G4R4A4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}},
{WINED3DFMT_B4G4R4X4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}},
{WINED3DFMT_B2G3R3_UNORM, { 7.0f, 7.0f, 3.0f, 0.0f}, { 5, 2, 0, 0}},
{WINED3DFMT_R8G8B8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}},
{WINED3DFMT_R8G8B8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}},
{WINED3DFMT_B10G10R10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, {20, 10, 0, 30}},
{WINED3DFMT_R10G10B10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, { 0, 10, 20, 30}},
{WINED3DFMT_P8_UINT, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_S1_UINT_D15_UNORM, { 32767.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_D16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_B8G8R8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B8G8R8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B8G8R8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, {16, 8, 0, 24}},
{WINED3DFMT_B5G6R5_UNORM, { 31.0f, 63.0f, 31.0f, 0.0f}, {11, 5, 0, 0}},
{WINED3DFMT_B5G5R5A1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}},
{WINED3DFMT_B5G5R5X1_UNORM, { 31.0f, 31.0f, 31.0f, 1.0f}, {10, 5, 0, 15}},
{WINED3DFMT_R8_UNORM, { 255.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_A8_UNORM, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_B4G4R4A4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}},
{WINED3DFMT_B4G4R4X4_UNORM, { 15.0f, 15.0f, 15.0f, 15.0f}, { 8, 4, 0, 12}},
{WINED3DFMT_B2G3R3_UNORM, { 7.0f, 7.0f, 3.0f, 0.0f}, { 5, 2, 0, 0}},
{WINED3DFMT_R8G8B8A8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}},
{WINED3DFMT_R8G8B8X8_UNORM, { 255.0f, 255.0f, 255.0f, 255.0f}, { 0, 8, 16, 24}},
{WINED3DFMT_B10G10R10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, {20, 10, 0, 30}},
{WINED3DFMT_R10G10B10A2_UNORM, { 1023.0f, 1023.0f, 1023.0f, 3.0f}, { 0, 10, 20, 30}},
{WINED3DFMT_R16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_R16G16_UNORM, { 65535.0f, 65535.0f, 0.0f, 0.0f}, { 0, 16, 0, 0}},
{WINED3DFMT_R16G16B16A16_UNORM,{ 65535.0f, 65535.0f, 65535.0f,65535.0f}, { 0, 16, 32, 48}},
{WINED3DFMT_P8_UINT, { 0.0f, 0.0f, 0.0f, 255.0f}, { 0, 0, 0, 0}},
{WINED3DFMT_S1_UINT_D15_UNORM, { 32767.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
};{WINED3DFMT_D16_UNORM, { 65535.0f, 0.0f, 0.0f, 0.0f}, { 0, 0, 0, 0}},
I think this is fine, although it's perhaps also worth pointing out that between WINED3DFMT_FLAG_NORMALISED, and the component sizes and offsets in struct wined3d_format, we actually already have the required information for most of these. The main exception would be odd formats like WINED3DFMT_P8_UINT.
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/resource.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index 8c3ca59bedc..9dab928dbc0 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -634,6 +634,16 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, wined3d_texture_get_level_box(texture_from_resource(resource), level, &level_box); }
+ /* Fast and simple path for setting everything to zero. The C library's memset is + * more sophisticated than our code below. Also this works for block formats, which + * we still need to zero-initialize for newly created resources. */ + if (!colour->r && !colour->g && !colour->b && !colour->a && + !memcmp(box, &level_box, sizeof(*box))) + { + memset(map->data, 0, map->slice_pitch * box->back); + return; + } + w = min(box->right, level_box.right) - min(box->left, level_box.right); h = min(box->bottom, level_box.bottom) - min(box->top, level_box.bottom); if (resource->type != WINED3D_RTYPE_TEXTURE_3D)
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
- /* Fast and simple path for setting everything to zero. The C library's memset is
* more sophisticated than our code below. Also this works for block formats, which
* we still need to zero-initialize for newly created resources. */
- if (!colour->r && !colour->g && !colour->b && !colour->a &&
!memcmp(box, &level_box, sizeof(*box)))
- {
The "&&" should go to the start of the next line.
Signed-off-by: Stefan Dösinger stefan@codeweavers.com
---
This patch probably conflicts with 234450. I can resend as required. --- dlls/wined3d/resource.c | 6 ++++++ dlls/wined3d/texture.c | 16 ++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/dlls/wined3d/resource.c b/dlls/wined3d/resource.c index 9dab928dbc0..34bbd4ffcbb 100644 --- a/dlls/wined3d/resource.c +++ b/dlls/wined3d/resource.c @@ -644,6 +644,12 @@ void wined3d_resource_memory_colour_fill(struct wined3d_resource *resource, return; }
+ if (resource->format_flags & WINED3DFMT_FLAG_BLOCKS) + { + FIXME("Not implemented for format %s.\n", debug_d3dformat(resource->format->id)); + return; + } + w = min(box->right, level_box.right) - min(box->left, level_box.right); h = min(box->bottom, level_box.bottom) - min(box->top, level_box.bottom); if (resource->type != WINED3D_RTYPE_TEXTURE_3D) diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index 5fd38b49132..6756d1c31e1 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -815,7 +815,6 @@ BOOL wined3d_texture_load_location(struct wined3d_texture *texture, { struct wined3d_bo_address source, destination; struct wined3d_range range; - void *map_ptr;
if (!wined3d_texture_prepare_location(texture, sub_resource_idx, context, location)) return FALSE; @@ -824,12 +823,21 @@ BOOL wined3d_texture_load_location(struct wined3d_texture *texture, range.size = texture->sub_resources[sub_resource_idx].size; if (current & WINED3D_LOCATION_CLEARED) { + static const struct wined3d_color black; + unsigned int level_idx = sub_resource_idx % texture->level_count; + struct wined3d_map_desc map; + struct wined3d_box box; + + wined3d_texture_get_pitch(texture, level_idx, &map.row_pitch, &map.slice_pitch); if (destination.buffer_object) - map_ptr = wined3d_context_map_bo_address(context, &destination, range.size, + map.data = wined3d_context_map_bo_address(context, &destination, range.size, WINED3D_MAP_WRITE | WINED3D_MAP_DISCARD); else - map_ptr = destination.addr; - memset(map_ptr, 0, range.size); + map.data = destination.addr; + + wined3d_texture_get_level_box(texture, level_idx, &box); + wined3d_resource_memory_colour_fill(&texture->resource, level_idx, &map, &black, &box); + if (destination.buffer_object) wined3d_context_unmap_bo_address(context, &destination, 1, &range); }
Signed-off-by: Stefan Dösinger stefan@codeweavers.com --- dlls/wined3d/context_vk.c | 57 +++++++++++++++++++++++++++++++--- dlls/wined3d/texture.c | 40 ++++++++++++++---------- dlls/wined3d/wined3d_private.h | 9 ++++++ 3 files changed, 85 insertions(+), 21 deletions(-)
diff --git a/dlls/wined3d/context_vk.c b/dlls/wined3d/context_vk.c index b19220c2e34..b9d3375a008 100644 --- a/dlls/wined3d/context_vk.c +++ b/dlls/wined3d/context_vk.c @@ -2494,7 +2494,7 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont VkCommandBuffer vk_command_buffer, const struct wined3d_state *state, const struct wined3d_vk_info *vk_info) { struct wined3d_device_vk *device_vk = wined3d_device_vk(context_vk->c.device); - static const VkClearValue clear_values[WINED3D_MAX_RENDER_TARGETS + 1]; + VkClearValue clear_values[WINED3D_MAX_RENDER_TARGETS + 1]; VkImageView vk_views[WINED3D_MAX_RENDER_TARGETS + 1]; unsigned int fb_width, fb_height, fb_layer_count; struct wined3d_rendertarget_view_vk *rtv_vk; @@ -2503,6 +2503,7 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont struct wined3d_query_vk *query_vk; VkRenderPassBeginInfo begin_info; unsigned int attachment_count, i; + struct wined3d_texture *texture; VkFramebufferCreateInfo fb_desc; VkResult vr;
@@ -2519,6 +2520,7 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont begin_info.clearValueCount = 0; for (i = 0; i < ARRAY_SIZE(state->fb.render_targets); ++i) { + if (!(view = state->fb.render_targets[i]) || view->format->id == WINED3DFMT_NULL) continue;
@@ -2534,10 +2536,40 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont if (view->layer_count < fb_layer_count) fb_layer_count = view->layer_count; context_vk->rt_count = i + 1; - ++attachment_count;
if (wined3d_rendertarget_view_get_locations(view) & WINED3D_LOCATION_CLEARED) - begin_info.clearValueCount = attachment_count; + { + VkClearColorValue *c = &clear_values[attachment_count].color; + const struct wined3d_color *colour; + + if (view->resource->type == WINED3D_RTYPE_BUFFER) + { + static const struct wined3d_color zero; + colour = &zero; + } + else + { + texture = texture_from_resource(view->resource); + colour = &texture->sub_resources[view->sub_resource_idx].clear_value.colour; + } + + if (view->format_flags & WINED3DFMT_FLAG_INTEGER) + { + c->int32[0] = colour->r; + c->int32[1] = colour->g; + c->int32[2] = colour->b; + c->int32[3] = colour->a; + } + else + { + c->float32[0] = colour->r; + c->float32[1] = colour->g; + c->float32[2] = colour->b; + c->float32[3] = colour->a; + } + begin_info.clearValueCount = attachment_count + 1; + } + ++attachment_count; }
if ((view = state->fb.depth_stencil)) @@ -2553,10 +2585,25 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont fb_height = view->height; if (view->layer_count < fb_layer_count) fb_layer_count = view->layer_count; - ++attachment_count;
if (wined3d_rendertarget_view_get_locations(view) & WINED3D_LOCATION_CLEARED) - begin_info.clearValueCount = attachment_count; + { + VkClearDepthStencilValue *c = &clear_values[attachment_count].depthStencil; + + if (view->resource->type == WINED3D_RTYPE_BUFFER) + { + c->depth = 0.0f; + c->stencil = 0; + } + else + { + texture = texture_from_resource(view->resource); + c->depth = texture->sub_resources[view->sub_resource_idx].clear_value.depth; + c->stencil = texture->sub_resources[view->sub_resource_idx].clear_value.stencil; + } + begin_info.clearValueCount = attachment_count + 1; + } + ++attachment_count; }
if (!(context_vk->vk_render_pass = wined3d_context_vk_get_render_pass(context_vk, &state->fb, diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index 6756d1c31e1..dd7f225a2ca 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -823,10 +823,22 @@ BOOL wined3d_texture_load_location(struct wined3d_texture *texture, range.size = texture->sub_resources[sub_resource_idx].size; if (current & WINED3D_LOCATION_CLEARED) { - static const struct wined3d_color black; unsigned int level_idx = sub_resource_idx % texture->level_count; struct wined3d_map_desc map; struct wined3d_box box; + struct wined3d_color c; + + if (texture->resource.format->flags[WINED3D_GL_RES_TYPE_TEX_2D] + & WINED3DFMT_FLAG_DEPTH_STENCIL) + { + c.r = texture->sub_resources[sub_resource_idx].clear_value.depth; + c.g = texture->sub_resources[sub_resource_idx].clear_value.stencil; + c.b = c.a = 0.0f; + } + else + { + c = texture->sub_resources[sub_resource_idx].clear_value.colour; + }
wined3d_texture_get_pitch(texture, level_idx, &map.row_pitch, &map.slice_pitch); if (destination.buffer_object) @@ -836,7 +848,7 @@ BOOL wined3d_texture_load_location(struct wined3d_texture *texture, map.data = destination.addr;
wined3d_texture_get_level_box(texture, level_idx, &box); - wined3d_resource_memory_colour_fill(&texture->resource, level_idx, &map, &black, &box); + wined3d_resource_memory_colour_fill(&texture->resource, level_idx, &map, &c, &box);
if (destination.buffer_object) wined3d_context_unmap_bo_address(context, &destination, 1, &range); @@ -6607,18 +6619,12 @@ static void vk_blitter_clear_rendertargets(struct wined3d_context_vk *context_vk
if (is_full_clear(view, draw_rect, clear_rects)) { - if (!colour->r && !colour->g && !colour->b && !colour->a) - { - wined3d_rendertarget_view_validate_location(view, WINED3D_LOCATION_CLEARED); - wined3d_rendertarget_view_invalidate_location(view, ~WINED3D_LOCATION_CLEARED); - delay_count++; - continue; - } - else - { - TRACE_(d3d_perf)("non-zero clear\n"); - wined3d_rendertarget_view_prepare_location(view, &context_vk->c, view->resource->draw_binding); - } + struct wined3d_texture *texture = texture_from_resource(view->resource); + wined3d_rendertarget_view_validate_location(view, WINED3D_LOCATION_CLEARED); + wined3d_rendertarget_view_invalidate_location(view, ~WINED3D_LOCATION_CLEARED); + texture->sub_resources[view->sub_resource_idx].clear_value.colour = *colour; + delay_count++; + continue; } else { @@ -6669,8 +6675,7 @@ static void vk_blitter_clear_rendertargets(struct wined3d_context_vk *context_vk if (view->format->stencil_size) full_flags |= WINED3DCLEAR_STENCIL;
- if (!is_full_clear(view, draw_rect, clear_rects) - || depth || stencil || (flags & full_flags) != full_flags) + if (!is_full_clear(view, draw_rect, clear_rects) || (flags & full_flags) != full_flags) { wined3d_rendertarget_view_load_location(view, &context_vk->c, view->resource->draw_binding); wined3d_rendertarget_view_validate_location(view, view->resource->draw_binding); @@ -6691,6 +6696,9 @@ static void vk_blitter_clear_rendertargets(struct wined3d_context_vk *context_vk } else { + struct wined3d_texture *texture = texture_from_resource(view->resource); + texture->sub_resources[view->sub_resource_idx].clear_value.depth = depth; + texture->sub_resources[view->sub_resource_idx].clear_value.stencil = stencil; wined3d_rendertarget_view_validate_location(view, WINED3D_LOCATION_CLEARED); wined3d_rendertarget_view_invalidate_location(view, ~WINED3D_LOCATION_CLEARED); flags &= ~(WINED3DCLEAR_ZBUFFER | WINED3DCLEAR_STENCIL); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 98a19e0fd65..a7a26ce1626 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -4560,6 +4560,15 @@ struct wined3d_texture uint32_t map_flags; DWORD locations; struct wined3d_bo *bo; + union + { + struct wined3d_color colour; + struct + { + float depth; + unsigned int stencil; + }; + } clear_value;
void *user_memory; } *sub_resources;
On Sun, 1 May 2022 at 20:39, Stefan Dösinger stefan@codeweavers.com wrote:
@@ -2519,6 +2520,7 @@ static bool wined3d_context_vk_begin_render_pass(struct wined3d_context_vk *cont begin_info.clearValueCount = 0; for (i = 0; i < ARRAY_SIZE(state->fb.render_targets); ++i) {
if (!(view = state->fb.render_targets[i]) || view->format->id == WINED3DFMT_NULL) continue;
Stray newline.
So it covers more formats and provokes the clear-to-sysmem path with the Vulkan renderer.
Signed-off-by: Stefan Dösinger stefan@codeweavers.com
---
Yes, the d3d9 support in the Vulkan renderer is good enough for this test to run and produce useful results for map-after-clear. --- dlls/d3d9/tests/visual.c | 132 +++++++++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 26 deletions(-)
diff --git a/dlls/d3d9/tests/visual.c b/dlls/d3d9/tests/visual.c index f20f1ecb369..70114c0ed85 100644 --- a/dlls/d3d9/tests/visual.c +++ b/dlls/d3d9/tests/visual.c @@ -1548,10 +1548,10 @@ done:
static void color_fill_test(void) { + unsigned int fill_a, expected_a; IDirect3DSurface9 *surface; IDirect3DTexture9 *texture; D3DCOLOR fill_color, color; - DWORD fill_a, expected_a; IDirect3DDevice9 *device; IDirect3D9 *d3d; ULONG refcount; @@ -1580,20 +1580,39 @@ static void color_fill_test(void) { CHECK_FILL_VALUE = 0x1, BLOCKS = 0x2, + FLOAT_VALUES = 0x4, } flags; - DWORD fill_value; + unsigned int fill_i[4]; + float fill_f[4]; } formats[] = { - {D3DFMT_A8R8G8B8, "D3DFMT_A8R8G8B8", CHECK_FILL_VALUE, 0xdeadbeef}, + {D3DFMT_A8R8G8B8, "D3DFMT_A8R8G8B8", CHECK_FILL_VALUE, + {0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}}, /* D3DFMT_X8R8G8B8 either set X = A or X = 0, depending on the driver. */ - {D3DFMT_R5G6B5, "D3DFMT_R5G6B5", CHECK_FILL_VALUE, 0xadfdadfd}, - {D3DFMT_G16R16, "D3DFMT_G16R16", CHECK_FILL_VALUE, 0xbebeadad}, + {D3DFMT_R5G6B5, "D3DFMT_R5G6B5", CHECK_FILL_VALUE, + {0xadfdadfd, 0xadfdadfd, 0xadfdadfd, 0xadfdadfd}}, + {D3DFMT_G16R16, "D3DFMT_G16R16", CHECK_FILL_VALUE, + {0xbebeadad, 0xbebeadad, 0xbebeadad, 0xbebeadad}}, + {D3DFMT_A16B16G16R16, "D3DFMT_A16B16G16R16", CHECK_FILL_VALUE, + {0xbebeadad, 0xdedeefef, 0xbebeadad, 0xdedeefef}}, /* Real hardware reliably fills the surface with the blue channel but * the testbot fills it with 0x00. Wine incorrectly uses the alpha * channel. Don't bother checking the result because P8 surfaces are * essentially useless in d3d9. */ - {D3DFMT_P8, "D3DFMT_P8", 0, 0xefefefef}, + {D3DFMT_P8, "D3DFMT_P8", 0, + {0xefefefef, 0xefefefef, 0xefefefef, 0xefefefef}}, + /* Float formats. */ + {D3DFMT_R32F, "D3DFMT_R32F", CHECK_FILL_VALUE | FLOAT_VALUES, + {0, 0, 0, 0}, {0xad / 255.0f, 0xad / 255.0f, 0xad / 255.0f, 0xad / 255.0f}}, + {D3DFMT_A32B32G32R32F, "D3DFMT_A32B32G32R32F", CHECK_FILL_VALUE | FLOAT_VALUES, + {0, 0, 0, 0}, {0xad / 255.0f, 0xbe / 255.0f, 0xef / 255.0f, 0xde / 255.0f}}, + {D3DFMT_R16F, "D3DFMT_R16F", CHECK_FILL_VALUE, + {0x396d396d, 0x396d396d, 0x396d396d, 0x396d396d}}, + {D3DFMT_G16R16F, "D3DFMT_G16R16F", CHECK_FILL_VALUE, + {0x39f5396d, 0x39f5396d, 0x39f5396d, 0x39f5396d}}, + {D3DFMT_A16B16G16R16F, "D3DFMT_A16B16G16R16F", CHECK_FILL_VALUE, + {0x39f5396d, 0x3af63b7f, 0x39f5396d, 0x3af63b7f}}, /* Windows drivers produce different results for these formats. * No driver produces a YUV value that matches the input RGB * value, and no driver produces a proper DXT compression block. @@ -1603,17 +1622,16 @@ static void color_fill_test(void) * * The YUV tests are disabled because they produce a driver-dependent * result on Wine. - * {D3DFMT_YUY2, "D3DFMT_YUY2", BLOCKS, 0}, - * {D3DFMT_UYVY, "D3DFMT_UYVY", BLOCKS, 0}, */ - {D3DFMT_DXT1, "D3DFMT_DXT1", BLOCKS, 0x00000000}, + * {D3DFMT_YUY2, "D3DFMT_YUY2", BLOCKS}, + * {D3DFMT_UYVY, "D3DFMT_UYVY", BLOCKS}, */ + {D3DFMT_DXT1, "D3DFMT_DXT1", BLOCKS}, /* Vendor-specific formats like ATI2N are a non-issue here since they're not * supported as offscreen plain surfaces and do not support D3DUSAGE_RENDERTARGET * when created as texture. */ }; - unsigned int i; - D3DLOCKED_RECT locked_rect; - DWORD *surface_data; static const RECT rect = {4, 4, 8, 8}, rect2 = {5, 5, 7, 7}; + D3DLOCKED_RECT locked_rect; + unsigned int i, j;
window = create_window(); d3d = Direct3DCreate9(D3D_SDK_VERSION); @@ -1732,22 +1750,84 @@ static void color_fill_test(void) ok(SUCCEEDED(hr), "Failed to color fill, hr %#x, fmt=%s.\n", hr, formats[i].name); }
- if (formats[i].flags & CHECK_FILL_VALUE) + if (!(formats[i].flags & CHECK_FILL_VALUE)) { - hr = IDirect3DSurface9_LockRect(surface, &locked_rect, NULL, D3DLOCK_READONLY); - ok(SUCCEEDED(hr), "Failed to lock surface, hr %#x, fmt=%s.\n", hr, formats[i].name); - surface_data = locked_rect.pBits; - fill_a = (surface_data[0] & 0xff000000) >> 24; - expected_a = (formats[i].fill_value & 0xff000000) >> 24; - /* Windows drivers disagree on how to promote the 8 bit per channel - * input argument to 16 bit for D3DFMT_G16R16. */ - ok(color_match(surface_data[0], formats[i].fill_value, 2) && - compare_uint(expected_a, fill_a, 2), - "Expected clear value 0x%08x, got 0x%08x, fmt=%s.\n", - formats[i].fill_value, surface_data[0], formats[i].name); - hr = IDirect3DSurface9_UnlockRect(surface); - ok(SUCCEEDED(hr), "Failed to unlock surface, hr %#x, fmt=%s.\n", hr, formats[i].name); + IDirect3DSurface9_Release(surface); + continue; + } + + hr = IDirect3DSurface9_LockRect(surface, &locked_rect, NULL, 0); + ok(SUCCEEDED(hr), "Failed to lock surface, hr %#x, fmt=%s.\n", hr, formats[i].name); + /* Windows drivers disagree on how to promote the 8 bit per channel + * input argument to 16 bit for D3DFMT_G16R16. */ + if (formats[i].flags & FLOAT_VALUES) + { + const struct vec4 *surface_data = locked_rect.pBits; + ok(compare_vec4(surface_data, formats[i].fill_f[0], formats[i].fill_f[1], + formats[i].fill_f[2], formats[i].fill_f[3], 1), + "Expected clear values %f %f %f %f, got %f %f %f %f, fmt=%s\n", + formats[i].fill_f[0], formats[i].fill_f[1], + formats[i].fill_f[2], formats[i].fill_f[3], + surface_data->x, surface_data->y, surface_data->z, surface_data->w, + formats[i].name); } + else + { + const unsigned int *surface_data = locked_rect.pBits; + for (j = 0; j < 4; ++j) + { + fill_a = (surface_data[j] & 0xff000000) >> 24; + expected_a = (formats[i].fill_i[j] & 0xff000000) >> 24; + ok(color_match(surface_data[j], formats[i].fill_i[j], 2) && + compare_uint(expected_a, fill_a, 2), + "Expected clear value 0x%08x, got 0x%08x, fmt=%s, j=%u.\n", + formats[i].fill_i[j], surface_data[j], formats[i].name, j); + } + } + + /* Fill the surface with something else to make sure the test below doesn't pass + * due to stale contents by accident. */ + memset(locked_rect.pBits, 0x55, locked_rect.Pitch * 32); + + hr = IDirect3DSurface9_UnlockRect(surface); + ok(SUCCEEDED(hr), "Failed to unlock surface, hr %#x, fmt=%s.\n", hr, formats[i].name); + + /* Test clearing "to sysmem". Wined3d's delayed clear will perform the actual clear + * in the lock call and try to fill the sysmem buffer instead of clearing on the + * GPU and downloading it. */ + hr = IDirect3DDevice9_ColorFill(device, surface, NULL, 0xdeadbeef); + ok(SUCCEEDED(hr), "Failed to color fill, hr %#x, fmt=%s.\n", hr, formats[i].name); + hr = IDirect3DSurface9_LockRect(surface, &locked_rect, NULL, D3DLOCK_READONLY); + ok(SUCCEEDED(hr), "Failed to lock surface, hr %#x, fmt=%s.\n", hr, formats[i].name); + + if (formats[i].flags & FLOAT_VALUES) + { + const struct vec4 *surface_data = locked_rect.pBits; + ok(compare_vec4(surface_data, formats[i].fill_f[0], formats[i].fill_f[1], + formats[i].fill_f[2], formats[i].fill_f[3], 1), + "Expected clear values %f %f %f %f, got %f %f %f %f, fmt=%s\n", + formats[i].fill_f[0], formats[i].fill_f[1], + formats[i].fill_f[2], formats[i].fill_f[3], + surface_data->x, surface_data->y, surface_data->z, surface_data->w, + formats[i].name); + } + else + { + const unsigned int *surface_data = locked_rect.pBits; + for (j = 0; j < 4; ++j) + { + fill_a = (surface_data[j] & 0xff000000) >> 24; + expected_a = (formats[i].fill_i[j] & 0xff000000) >> 24; + ok(color_match(surface_data[j], formats[i].fill_i[j], 2) && + compare_uint(expected_a, fill_a, 2), + "Expected clear value 0x%08x, got 0x%08x, fmt=%s, j=%u.\n", + formats[i].fill_i[j], surface_data[j], formats[i].name, j); + } + } + + + hr = IDirect3DSurface9_UnlockRect(surface); + ok(SUCCEEDED(hr), "Failed to unlock surface, hr %#x, fmt=%s.\n", hr, formats[i].name);
IDirect3DSurface9_Release(surface); }
Hi,
While running your changed tests, I think I found new failures. Being a bot and all I'm not very good at pattern recognition, so I might be wrong, but could you please double-check?
Full results can be found at: https://testbot.winehq.org/JobDetails.pl?Key=113898
Your paranoid android.
=== w8adm (32 bit report) ===
d3d9: visual.c:8907: Test failed: Input test: Quad 3(2crd-wrongidx) returned color 0x00ff00ff, expected 0x00ff0080