Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- It turns out 7654d58b047be3a54d814d890bf1a26374559c83 caused a pretty sizeable performance regression (which gets a lot more significant once wined3d performance gets better - I have patches in that area). Let's start fixing this.
dlls/wined3d/device.c | 151 ++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 79 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 2297d7de916..87684dc1c46 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3830,12 +3830,67 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d return device->state.textures[stage]; }
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device, + unsigned int start_idx, unsigned int count, const void *constants); + +static void device_apply_shader_constants(struct wined3d_device *device, + const struct wined3d_stateblock_state *state, + DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride, + wined3d_device_shader_constant_setter shader_constant_setter) +{ + const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT; + unsigned int i, j, idx, start, last; + const BYTE *byte_data = data; + DWORD map; + + start = last = ~0u; + for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i) + { + map = bitmap[i]; + + if (map == ~0u) + { + if (last != ~0u && last != i * word_bit_count - 1) + { + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); + start = i * word_bit_count; + } + if (start == ~0u) + start = i * word_bit_count; + last = i * word_bit_count + word_bit_count - 1; + continue; + } + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + + if (start == ~0u) + { + start = last = idx; + } + else if (last != idx - 1) + { + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); + start = last = idx; + } + else + { + last = idx; + } + } + } + if (start != ~0u) + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); +} + void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, struct wined3d_stateblock *stateblock) { const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; - unsigned int i, j, count; + unsigned int i, j; + DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3844,89 +3899,27 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, if (stateblock->changed.pixelShader) wined3d_device_set_pixel_shader(device, state->ps);
- count = 0; - for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) - { - if (stateblock->changed.vs_consts_f[i >> 5] & (1u << (i & 0x1f))) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); + device_apply_shader_constants(device, state, stateblock->changed.vs_consts_f, d3d_info->limits.vs_uniform_count, + state->vs_consts_f, sizeof(*state->vs_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.vertexShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); + map = stateblock->changed.vertexShaderConstantsB; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->vs_consts_b, sizeof(*state->vs_consts_b), + (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.vertexShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); + map = stateblock->changed.vertexShaderConstantsI; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->vs_consts_i, sizeof(*state->vs_consts_i), + (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_i);
- count = 0; - for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) - { - if (stateblock->changed.ps_consts_f[i >> 5] & (1u << (i & 0x1f))) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); + device_apply_shader_constants(device, state, stateblock->changed.ps_consts_f, d3d_info->limits.ps_uniform_count, + state->ps_consts_f, sizeof(*state->ps_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.pixelShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); + map = stateblock->changed.pixelShaderConstantsB; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->ps_consts_b, sizeof(*state->ps_consts_b), + (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.pixelShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); + map = stateblock->changed.pixelShaderConstantsI; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->ps_consts_i, sizeof(*state->ps_consts_i), + (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_i);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i) {