If they don't match and there's more than one slice to copy, we can't use a single memcpy. Fixes blinking objects in Hellblade: Senua's Sacrifice.
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/utils.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/dlls/wined3d/utils.c b/dlls/wined3d/utils.c index 9df51872d42..1e1c899523d 100644 --- a/dlls/wined3d/utils.c +++ b/dlls/wined3d/utils.c @@ -5981,17 +5981,19 @@ void wined3d_format_copy_data(const struct wined3d_format *format, const uint8_t unsigned int dst_slice_pitch, unsigned int w, unsigned int h, unsigned int d) { unsigned int row_block_count, row_count, row_size, slice, row; - unsigned int slice_count = d; + unsigned int slice_count = d, slice_size; const uint8_t *src_row; uint8_t *dst_row;
row_block_count = (w + format->block_width - 1) / format->block_width; row_count = (h + format->block_height - 1) / format->block_height; row_size = row_block_count * format->block_byte_count; + slice_size = row_size * row_count;
- if (src_row_pitch == row_size && dst_row_pitch == row_size && src_slice_pitch == dst_slice_pitch) + if (src_row_pitch == row_size && dst_row_pitch == row_size + && ((src_slice_pitch == slice_size && dst_slice_pitch == slice_size) || slice_count == 1)) { - memcpy(dst, src, slice_count * row_count * row_size); + memcpy(dst, src, slice_count * slice_size); return; }
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/wined3d/texture.c | 56 ++++++++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 18 deletions(-)
diff --git a/dlls/wined3d/texture.c b/dlls/wined3d/texture.c index a4148b166f2..09f2fd766c5 100644 --- a/dlls/wined3d/texture.c +++ b/dlls/wined3d/texture.c @@ -2137,7 +2137,7 @@ HRESULT CDECL wined3d_texture_add_dirty_region(struct wined3d_texture *texture, }
static void wined3d_texture_gl_upload_bo(const struct wined3d_format *src_format, GLenum target, - unsigned int level, unsigned int src_row_pitch, unsigned int dst_x, unsigned int dst_y, + unsigned int level, unsigned int src_row_pitch, unsigned int src_slice_pitch, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z, unsigned int update_w, unsigned int update_h, unsigned int update_d, const BYTE *addr, BOOL srgb, struct wined3d_texture *dst_texture, const struct wined3d_gl_info *gl_info) @@ -2213,7 +2213,9 @@ static void wined3d_texture_gl_upload_bo(const struct wined3d_format *src_format } else { - unsigned int y, y_count; + unsigned int slice_to_row_pitch = src_slice_pitch / src_row_pitch; + unsigned int y, y_count, z, z_count; + const BYTE *upload_addr;
TRACE("Uploading data, target %#x, level %u, x %u, y %u, z %u, " "w %u, h %u, d %u, format %#x, type %#x, addr %p.\n", @@ -2231,25 +2233,43 @@ static void wined3d_texture_gl_upload_bo(const struct wined3d_format *src_format update_h = 1; }
- for (y = 0; y < y_count; ++y) + if (src_slice_pitch && slice_to_row_pitch * src_row_pitch == src_slice_pitch) { - if (target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D) - { - GL_EXTCALL(glTexSubImage3D(target, level, dst_x, dst_y + y, dst_z, - update_w, update_h, update_d, format_gl->format, format_gl->type, addr)); - } - else if (target == GL_TEXTURE_1D) - { - gl_info->gl_ops.gl.p_glTexSubImage1D(target, level, dst_x, - update_w, format_gl->format, format_gl->type, addr); - } - else + gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, slice_to_row_pitch); + z_count = 1; + } + else + { + z_count = update_d; + update_d = 1; + } + + for (z = 0; z < z_count; ++z) + { + for (y = 0; y < y_count; ++y) { - gl_info->gl_ops.gl.p_glTexSubImage2D(target, level, dst_x, dst_y + y, - update_w, update_h, format_gl->format, format_gl->type, addr); + upload_addr = addr; + if (target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D) + { + GL_EXTCALL(glTexSubImage3D(target, level, dst_x, dst_y + y, dst_z + z, + update_w, update_h, update_d, format_gl->format, format_gl->type, upload_addr)); + } + else if (target == GL_TEXTURE_1D) + { + gl_info->gl_ops.gl.p_glTexSubImage1D(target, level, dst_x, + update_w, format_gl->format, format_gl->type, upload_addr); + } + else + { + gl_info->gl_ops.gl.p_glTexSubImage2D(target, level, dst_x, dst_y + y, + update_w, update_h, format_gl->format, format_gl->type, upload_addr); + } + upload_addr += src_row_pitch; } + addr += src_slice_pitch; } gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); checkGLcall("Upload texture data"); } } @@ -2462,7 +2482,7 @@ static void wined3d_texture_gl_upload_data(struct wined3d_context *context, src_format->upload(src_mem, converted_mem, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, update_w, update_h, 1);
- wined3d_texture_gl_upload_bo(src_format, target, level, dst_row_pitch, dst_x, dst_y, + wined3d_texture_gl_upload_bo(src_format, target, level, dst_row_pitch, dst_slice_pitch, dst_x, dst_y, dst_z + z, update_w, update_h, 1, converted_mem, srgb, dst_texture, gl_info); }
@@ -2477,7 +2497,7 @@ static void wined3d_texture_gl_upload_data(struct wined3d_context *context, checkGLcall("glBindBuffer"); }
- wined3d_texture_gl_upload_bo(src_format, target, level, src_row_pitch, dst_x, dst_y, + wined3d_texture_gl_upload_bo(src_format, target, level, src_row_pitch, src_slice_pitch, dst_x, dst_y, dst_z, update_w, update_h, update_d, bo.addr, srgb, dst_texture, gl_info);
if (bo.buffer_object)
On Wed, 10 Mar 2021 at 15:11, Jan Sikorski jsikorski@codeweavers.com wrote:
static void wined3d_texture_gl_upload_bo(const struct wined3d_format *src_format, GLenum target,
unsigned int level, unsigned int src_row_pitch, unsigned int dst_x, unsigned int dst_y,
unsigned int level, unsigned int src_row_pitch, unsigned int src_slice_pitch, unsigned int dst_x, unsigned int dst_y, unsigned int dst_z, unsigned int update_w, unsigned int update_h, unsigned int update_d, const BYTE *addr, BOOL srgb, struct wined3d_texture *dst_texture, const struct wined3d_gl_info *gl_info)
That line is a bit on the long side. (For reference, there's a soft limit at 100 columns, and a hard limit at 120 columns.)
@@ -2213,7 +2213,9 @@ static void wined3d_texture_gl_upload_bo(const struct wined3d_format *src_format } else {
unsigned int y, y_count;
unsigned int slice_to_row_pitch = src_slice_pitch / src_row_pitch;
"src_row_pitch" may be 0.
if (src_slice_pitch && slice_to_row_pitch * src_row_pitch == src_slice_pitch) {
if (target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D)
{
GL_EXTCALL(glTexSubImage3D(target, level, dst_x, dst_y + y, dst_z,
update_w, update_h, update_d, format_gl->format, format_gl->type, addr));
}
else if (target == GL_TEXTURE_1D)
{
gl_info->gl_ops.gl.p_glTexSubImage1D(target, level, dst_x,
update_w, format_gl->format, format_gl->type, addr);
}
else
gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, slice_to_row_pitch);
z_count = 1;
}
So essentially,
if (src_slice_pitch && src_row_pitch && !(src_slice_pitch % src_row_pitch)) { gl_info->gl_ops.gl.p_glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, src_slice_pitch / src_row_pitch); ... }
right? The row pitch probably needs a similar alignment check, although unaligned pitches are perhaps rare enough that we've never run into them.
for (y = 0; y < y_count; ++y) {
gl_info->gl_ops.gl.p_glTexSubImage2D(target, level, dst_x, dst_y + y,
update_w, update_h, format_gl->format, format_gl->type, addr);
upload_addr = addr;
if (target == GL_TEXTURE_2D_ARRAY || target == GL_TEXTURE_3D)
{
GL_EXTCALL(glTexSubImage3D(target, level, dst_x, dst_y + y, dst_z + z,
update_w, update_h, update_d, format_gl->format, format_gl->type, upload_addr));
}
else if (target == GL_TEXTURE_1D)
{
gl_info->gl_ops.gl.p_glTexSubImage1D(target, level, dst_x,
update_w, format_gl->format, format_gl->type, upload_addr);
}
else
{
gl_info->gl_ops.gl.p_glTexSubImage2D(target, level, dst_x, dst_y + y,
update_w, update_h, format_gl->format, format_gl->type, upload_addr);
}
upload_addr += src_row_pitch; }
Incrementing "upload_addr" does nothing here, it's reset again at the start of the loop. It would probably be best to simply use "&addr[z * slice_pitch + y * row_pitch]".
You'd also need to handle slice pitches for compressed formats.
Signed-off-by: Jan Sikorski jsikorski@codeweavers.com --- dlls/d3d11/tests/d3d11.c | 122 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+)
diff --git a/dlls/d3d11/tests/d3d11.c b/dlls/d3d11/tests/d3d11.c index 6cdac34c0cd..944e4d2ef57 100644 --- a/dlls/d3d11/tests/d3d11.c +++ b/dlls/d3d11/tests/d3d11.c @@ -204,6 +204,12 @@ static void set_box(D3D11_BOX *box, UINT left, UINT top, UINT front, UINT right, box->back = back; }
+static BOOL is_inside_box(D3D11_BOX *box, UINT x, UINT y, UINT z) +{ + return x >= box->left && x < box->right && y >= box->top && y < box->bottom + && z >= box->front && z < box->back; +} + static ULONG get_refcount(void *iface) { IUnknown *unknown = iface; @@ -13832,6 +13838,121 @@ static void test_update_subresource(void) release_test_context(&test_context); }
+static void test_update_subresource_3d(void) +{ + int x, y, z, left, right, top, bottom, front, back, i; + struct d3d11_test_context test_context; + D3D11_TEXTURE3D_DESC texture_desc; + ID3D11DeviceContext *context; + struct resource_readback rb; + DWORD color, expected_color; + ID3D11Texture3D *texture; + D3D11_BOX box, clear_box; + ID3D11Device *device; + HRESULT hr; + + static const DWORD black_data[3 * 3 * 3] = {0}; + + static const DWORD color_data_packed[] = + { + 0xff000000, 0x0ff00000, 0x00ff0000, + 0x000ff000, 0x0000ff00, 0x00000ff0, + 0x000000ff, 0xf0000000, 0x0f000000, + + 0x00f00000, 0x000f0000, 0x0000f000, + 0x00000f00, 0x000000f0, 0x0000000f, + 0xfff00000, 0x0fff0000, 0x00fff000, + + 0x000fff00, 0x0000fff0, 0x00000fff, + 0xffff0000, 0x0ffff000, 0x00ffff00, + 0x000ffff0, 0x0000ffff, 0xfffff000, + }; + + static const DWORD color_data_with_junk[] = + { + 0xff000000, 0x0ff00000, 0x00ff0000, + 0x000ff000, 0x0000ff00, 0x00000ff0, + 0x000000ff, 0xf0000000, 0x0f000000, + 0xdead1111, /* Junk between slices */ + 0x00f00000, 0x000f0000, 0x0000f000, + 0x00000f00, 0x000000f0, 0x0000000f, + 0xfff00000, 0x0fff0000, 0x00fff000, + 0xdead2222, /* Junk between slices */ + 0x000fff00, 0x0000fff0, 0x00000fff, + 0xffff0000, 0x0ffff000, 0x00ffff00, + 0x000ffff0, 0x0000ffff, 0xfffff000, + }; + + static const DWORD *color_data_array[] = {color_data_packed, color_data_with_junk}; + static const unsigned int slice_pitches[] = {9, 10}; + + if (!init_test_context(&test_context, NULL)) + return; + + device = test_context.device; + context = test_context.immediate_context; + + texture_desc.Width = 3; + texture_desc.Height = 3; + texture_desc.Depth = 3; + texture_desc.MipLevels = 1; + texture_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + texture_desc.Usage = D3D11_USAGE_DEFAULT; + texture_desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + texture_desc.CPUAccessFlags = 0; + texture_desc.MiscFlags = 0; + + set_box(&clear_box, 0, 0, 0, 3, 3, 3); + + hr = ID3D11Device_CreateTexture3D(device, &texture_desc, NULL, &texture); + ok(SUCCEEDED(hr), "Failed to create 2d texture, hr %#x.\n", hr); + + for (i = 0; i < ARRAY_SIZE(color_data_array); ++i) + { + const DWORD *color_data = color_data_array[i]; + unsigned int slice_pitch = slice_pitches[i]; + + for (left = 0; left < 3; ++left) + for (right = left + 1; right <= 3; ++right) + for (top = 0; top < 3; ++top) + for (bottom = top + 1; bottom <= 3; ++bottom) + for (front = 0; front < 3; ++front) + for (back = front + 1; back <= 3; ++back) + { + ID3D11DeviceContext_UpdateSubresource(context, (ID3D11Resource *)texture, 0, &clear_box, + black_data, 0, 0); + + set_box(&box, left, top, front, right, bottom, back); + ID3D11DeviceContext_UpdateSubresource(context, (ID3D11Resource *)texture, 0, &box, + color_data + left + 3 * top + slice_pitch * front, 3 * sizeof(*color_data), + slice_pitch * sizeof(*color_data)); + + get_texture3d_readback(texture, 0, &rb); + + for (z = 0; z < 3; ++z) + { + for (y = 0; y < 3; ++y) + { + for (x = 0; x < 3; ++x) + { + color = get_readback_color(&rb, x, y, z); + expected_color = 0; + if (is_inside_box(&box, x, y, z)) + expected_color = color_data[x + 3 * y + slice_pitch * z]; + ok(compare_uint(color, expected_color, 0), "Box (%d,%d,%d)-(%d,%d,%d): Expected %x, got %x at %d %d %d.\n", + left, top, front, right, bottom, back, expected_color, color, x, y, z); + } + } + } + + release_resource_readback(&rb); + } + } + + ID3D11Texture3D_Release(texture); + release_test_context(&test_context); +} + static void test_copy_subresource_region(void) { ID3D11Texture2D *dst_texture, *src_texture; @@ -32197,6 +32318,7 @@ START_TEST(d3d11) queue_test(test_fragment_coords); queue_test(test_initial_texture_data); queue_test(test_update_subresource); + queue_test(test_update_subresource_3d); queue_test(test_copy_subresource_region); queue_test(test_copy_subresource_region_1d); queue_test(test_copy_subresource_region_3d);
On Wed, 10 Mar 2021 at 15:11, Jan Sikorski jsikorski@codeweavers.com wrote:
dlls/d3d11/tests/d3d11.c | 122 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+)
Do you have an equivalent d3d10core test as well?
+static void test_update_subresource_3d(void) +{
- int x, y, z, left, right, top, bottom, front, back, i;
unsigned int.
- static const DWORD color_data_with_junk[] =
- {
0xff000000, 0x0ff00000, 0x00ff0000,
0x000ff000, 0x0000ff00, 0x00000ff0,
0x000000ff, 0xf0000000, 0x0f000000,
0xdead1111, /* Junk between slices */
0x00f00000, 0x000f0000, 0x0000f000,
0x00000f00, 0x000000f0, 0x0000000f,
0xfff00000, 0x0fff0000, 0x00fff000,
0xdead2222, /* Junk between slices */
0x000fff00, 0x0000fff0, 0x00000fff,
0xffff0000, 0x0ffff000, 0x00ffff00,
0x000ffff0, 0x0000ffff, 0xfffff000,
- };
The formal term is "padding", I think.
- for (i = 0; i < ARRAY_SIZE(color_data_array); ++i)
- {
...
for (left = 0; left < 3; ++left)
for (right = left + 1; right <= 3; ++right)
for (top = 0; top < 3; ++top)
for (bottom = top + 1; bottom <= 3; ++bottom)
for (front = 0; front < 3; ++front)
for (back = front + 1; back <= 3; ++back)
{
...
for (z = 0; z < 3; ++z)
{
for (y = 0; y < 3; ++y)
{
for (x = 0; x < 3; ++x)
{
That's a lot of nested loops. Do we really need all of them?
ok(compare_uint(color, expected_color, 0), "Box (%d,%d,%d)-(%d,%d,%d): Expected %x, got %x at %d %d %d.\n",
left, top, front, right, bottom, back, expected_color, color, x, y, z);
0x%08x for colours, %u for the rest. Double indent for line continuations.