Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Simplify setting first / last DWORD in wined3d_bitmask_set_bits(). Rename most variables in there while at it.
dlls/wined3d/device.c | 5 +-- dlls/wined3d/stateblock.c | 69 ++++++++++++++++++++++++---------- dlls/wined3d/wined3d_private.h | 4 +- 3 files changed, 54 insertions(+), 24 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 17d71b3b1a3..3cf621ff46e 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -27,7 +27,6 @@ #include "config.h" #include "wine/port.h"
-#include <stdio.h> #ifdef HAVE_FLOAT_H # include <float.h> #endif @@ -3854,7 +3853,7 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, count = 0; for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) { - if (changed->vs_consts_f[i]) + if (wined3d_bitmap_is_set(changed->vs_consts_f, i)) ++count; else if (count) { @@ -3896,7 +3895,7 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, count = 0; for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) { - if (changed->ps_consts_f[i]) + if (wined3d_bitmap_is_set(changed->ps_consts_f, i)) ++count; else if (count) { diff --git a/dlls/wined3d/stateblock.c b/dlls/wined3d/stateblock.c index d787eef0d99..0f5e2d70ea2 100644 --- a/dlls/wined3d/stateblock.c +++ b/dlls/wined3d/stateblock.c @@ -189,7 +189,7 @@ static const DWORD vertex_states_sampler[] = WINED3D_SAMP_DMAP_OFFSET, };
-static inline void stateblock_set_bits(DWORD *map, UINT map_size) +static inline void stateblock_set_all_bits(DWORD *map, UINT map_size) { DWORD mask = (1u << (map_size & 0x1f)) - 1; memset(map, 0xff, (map_size >> 5) * sizeof(*map)); @@ -201,7 +201,6 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, { unsigned int i;
- /* Single values */ states->indices = 1; states->material = 1; states->viewport = 1; @@ -211,12 +210,11 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, states->scissorRect = 1; states->blend_state = 1;
- /* Fixed size arrays */ states->streamSource = 0xffff; states->streamFreq = 0xffff; states->textures = 0xfffff; - stateblock_set_bits(states->transform, WINED3D_HIGHEST_TRANSFORM_STATE + 1); - stateblock_set_bits(states->renderState, WINEHIGHEST_RENDER_STATE + 1); + stateblock_set_all_bits(states->transform, WINED3D_HIGHEST_TRANSFORM_STATE + 1); + stateblock_set_all_bits(states->renderState, WINEHIGHEST_RENDER_STATE + 1); for (i = 0; i < WINED3D_MAX_TEXTURES; ++i) states->textureState[i] = 0x3ffff; for (i = 0; i < WINED3D_MAX_COMBINED_SAMPLERS; ++i) states->samplerState[i] = 0x3ffe; states->clipplane = (1u << WINED3D_MAX_CLIP_DISTANCES) - 1; @@ -225,9 +223,8 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, states->vertexShaderConstantsB = 0xffff; states->vertexShaderConstantsI = 0xffff;
- /* Dynamically sized arrays */ - memset(states->ps_consts_f, TRUE, sizeof(BOOL) * ps_consts); - memset(states->vs_consts_f, TRUE, sizeof(BOOL) * vs_consts); + memset(states->ps_consts_f, 0xffu, sizeof(states->ps_consts_f)); + memset(states->vs_consts_f, 0xffu, sizeof(states->vs_consts_f)); }
static void stateblock_savedstates_set_pixel(struct wined3d_saved_states *states, const DWORD num_constants) @@ -253,7 +250,7 @@ static void stateblock_savedstates_set_pixel(struct wined3d_saved_states *states states->pixelShaderConstantsB = 0xffff; states->pixelShaderConstantsI = 0xffff;
- memset(states->ps_consts_f, TRUE, sizeof(BOOL) * num_constants); + memset(states->ps_consts_f, 0xffu, sizeof(states->ps_consts_f)); }
static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *states, const DWORD num_constants) @@ -281,12 +278,12 @@ static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *state states->vertexShaderConstantsB = 0xffff; states->vertexShaderConstantsI = 0xffff;
- memset(states->vs_consts_f, TRUE, sizeof(BOOL) * num_constants); + memset(states->vs_consts_f, 0xffu, sizeof(states->vs_consts_f)); }
void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *stateblock) { - const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; unsigned int i, j;
for (i = 0; i <= WINEHIGHEST_RENDER_STATE >> 5; ++i) @@ -313,11 +310,14 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) { - if (stateblock->changed.vs_consts_f[i]) + DWORD bitmask = stateblock->changed.vs_consts_f[i]; + + while (bitmask) { - stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i; + j = wined3d_bit_scan(&bitmask); + stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i * word_bit_count + j; ++stateblock->num_contained_vs_consts_f; } } @@ -340,11 +340,14 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) { - if (stateblock->changed.ps_consts_f[i]) + DWORD bitmask = stateblock->changed.ps_consts_f[i]; + + while (bitmask) { - stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i; + j = wined3d_bit_scan(&bitmask); + stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i * word_bit_count + j; ++stateblock->num_contained_ps_consts_f; } } @@ -1333,6 +1336,34 @@ void CDECL wined3d_stateblock_set_vertex_shader(struct wined3d_stateblock *state stateblock->changed.vertexShader = TRUE; }
+static void wined3d_bitmap_set_bits(uint32_t *bitmap, unsigned int start, unsigned int count) +{ + const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT; + const unsigned int shift = start % word_bit_count; + uint32_t mask, last_mask; + unsigned int mask_size; + + bitmap += start / word_bit_count; + mask = ~0u << shift; + mask_size = word_bit_count - shift; + last_mask = (1u << (start + count) % word_bit_count) - 1; + if (mask_size <= count) + { + *bitmap |= mask; + ++bitmap; + count -= mask_size; + mask = ~0u; + } + if (count >= word_bit_count) + { + memset(bitmap, 0xffu, count / word_bit_count * sizeof(*bitmap)); + bitmap += count / word_bit_count; + count = count % word_bit_count; + } + if (count) + *bitmap |= mask & last_mask; +} + HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { @@ -1343,7 +1374,7 @@ HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stat return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants)); - memset(&stateblock->changed.vs_consts_f[start_idx], 1, count * sizeof(*stateblock->changed.vs_consts_f)); + wined3d_bitmap_set_bits(stateblock->changed.vs_consts_f, start_idx, count); return WINED3D_OK; }
@@ -1409,7 +1440,7 @@ HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stat return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants)); - memset(&stateblock->changed.ps_consts_f[start_idx], 1, count * sizeof(*stateblock->changed.ps_consts_f)); + wined3d_bitmap_set_bits(stateblock->changed.ps_consts_f, start_idx, count); return WINED3D_OK; }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index d29dfc312b2..af372055dfb 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3923,10 +3923,10 @@ struct wined3d_saved_states DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */ WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - BOOL ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; + DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5]; WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - BOOL vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; + DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5]; DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */ DWORD indices : 1; DWORD material : 1;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/stateblock.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/dlls/wined3d/stateblock.c b/dlls/wined3d/stateblock.c index 0f5e2d70ea2..6e8bebd719a 100644 --- a/dlls/wined3d/stateblock.c +++ b/dlls/wined3d/stateblock.c @@ -1367,10 +1367,13 @@ static void wined3d_bitmap_set_bits(uint32_t *bitmap, unsigned int start, unsign HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { + const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_VS_CONSTS_F || count > WINED3D_MAX_VS_CONSTS_F - start_idx) + if (!constants || start_idx >= d3d_info->limits.vs_uniform_count + || count > d3d_info->limits.vs_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants)); @@ -1433,10 +1436,13 @@ void CDECL wined3d_stateblock_set_pixel_shader(struct wined3d_stateblock *stateb HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { + const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_PS_CONSTS_F || count > WINED3D_MAX_PS_CONSTS_F - start_idx) + if (!constants || start_idx >= d3d_info->limits.ps_uniform_count + || count > d3d_info->limits.ps_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants));
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Split the changes to wined3d_stateblock_set_*s_consts_f() to a separate patch.
dlls/wined3d/stateblock.c | 169 +++++++++++++-------------------- dlls/wined3d/wined3d_private.h | 12 --- 2 files changed, 66 insertions(+), 115 deletions(-)
diff --git a/dlls/wined3d/stateblock.c b/dlls/wined3d/stateblock.c index 6e8bebd719a..69210e1fba2 100644 --- a/dlls/wined3d/stateblock.c +++ b/dlls/wined3d/stateblock.c @@ -283,7 +283,6 @@ static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *state
void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *stateblock) { - const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; unsigned int i, j;
for (i = 0; i <= WINEHIGHEST_RENDER_STATE >> 5; ++i) @@ -310,66 +309,6 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) - { - DWORD bitmask = stateblock->changed.vs_consts_f[i]; - - while (bitmask) - { - j = wined3d_bit_scan(&bitmask); - stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i * word_bit_count + j; - ++stateblock->num_contained_vs_consts_f; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.vertexShaderConstantsI & (1u << i)) - { - stateblock->contained_vs_consts_i[stateblock->num_contained_vs_consts_i] = i; - ++stateblock->num_contained_vs_consts_i; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.vertexShaderConstantsB & (1u << i)) - { - stateblock->contained_vs_consts_b[stateblock->num_contained_vs_consts_b] = i; - ++stateblock->num_contained_vs_consts_b; - } - } - - for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) - { - DWORD bitmask = stateblock->changed.ps_consts_f[i]; - - while (bitmask) - { - j = wined3d_bit_scan(&bitmask); - stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i * word_bit_count + j; - ++stateblock->num_contained_ps_consts_f; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.pixelShaderConstantsI & (1u << i)) - { - stateblock->contained_ps_consts_i[stateblock->num_contained_ps_consts_i] = i; - ++stateblock->num_contained_ps_consts_i; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.pixelShaderConstantsB & (1u << i)) - { - stateblock->contained_ps_consts_b[stateblock->num_contained_ps_consts_b] = i; - ++stateblock->num_contained_ps_consts_b; - } - } - for (i = 0; i < WINED3D_MAX_TEXTURES; ++i) { DWORD map = stateblock->changed.textureState[i]; @@ -778,8 +717,9 @@ static void wined3d_state_record_lights(struct wined3d_light_state *dst_state, void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, const struct wined3d_stateblock *device_state) { + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; const struct wined3d_stateblock_state *state = &device_state->stateblock_state; - unsigned int i; + unsigned int i, j, idx; DWORD map;
TRACE("stateblock %p, device_state %p.\n", stateblock, device_state); @@ -795,30 +735,34 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, stateblock->stateblock_state.vs = state->vs; }
- /* Vertex shader float constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) { - unsigned int idx = stateblock->contained_vs_consts_f[i]; + map = stateblock->changed.vs_consts_f[i];
- TRACE("Setting vs_consts_f[%u] to %s.\n", idx, debug_vec4(&state->vs_consts_f[idx])); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- stateblock->stateblock_state.vs_consts_f[idx] = state->vs_consts_f[idx]; + TRACE("Setting vs_consts_f[%u] to %s.\n", idx, debug_vec4(&state->vs_consts_f[idx])); + stateblock->stateblock_state.vs_consts_f[idx] = state->vs_consts_f[idx]; + } }
- /* Vertex shader integer constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_i; ++i) + map = stateblock->changed.vertexShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_i[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting vs_consts_i[%u] to %s.\n", idx, debug_ivec4(&state->vs_consts_i[idx]));
stateblock->stateblock_state.vs_consts_i[idx] = state->vs_consts_i[idx]; }
- /* Vertex shader boolean constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_b; ++i) + map = stateblock->changed.vertexShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_b[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting vs_consts_b[%u] to %s.\n", idx, state->vs_consts_b[idx] ? "TRUE" : "FALSE"); @@ -826,30 +770,34 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, stateblock->stateblock_state.vs_consts_b[idx] = state->vs_consts_b[idx]; }
- /* Pixel shader float constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) { - unsigned int idx = stateblock->contained_ps_consts_f[i]; + map = stateblock->changed.ps_consts_f[i];
- TRACE("Setting ps_consts_f[%u] to %s.\n", idx, debug_vec4(&state->ps_consts_f[idx])); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- stateblock->stateblock_state.ps_consts_f[idx] = state->ps_consts_f[idx]; + TRACE("Setting ps_consts_f[%u] to %s.\n", idx, debug_vec4(&state->ps_consts_f[idx])); + stateblock->stateblock_state.ps_consts_f[idx] = state->ps_consts_f[idx]; + } }
- /* Pixel shader integer constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_i; ++i) + map = stateblock->changed.pixelShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_i[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting ps_consts_i[%u] to %s.\n", idx, debug_ivec4(&state->ps_consts_i[idx]));
stateblock->stateblock_state.ps_consts_i[idx] = state->ps_consts_i[idx]; }
- /* Pixel shader boolean constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_b; ++i) + map = stateblock->changed.pixelShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_b[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting ps_consts_b[%u] to %s.\n", idx, state->ps_consts_b[idx] ? "TRUE" : "FALSE"); @@ -1051,12 +999,13 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, struct wined3d_stateblock *device_state) { + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; struct wined3d_stateblock_state *state = &device_state->stateblock_state; struct wined3d_device *device = stateblock->device; struct wined3d_blend_state *blend_state; struct wined3d_color colour; + unsigned int i, j, idx; BOOL set_blend_state; - unsigned int i; DWORD map;
TRACE("stateblock %p, device_state %p.\n", stateblock, device_state); @@ -1071,24 +1020,31 @@ void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, wined3d_device_set_vertex_shader(device, stateblock->stateblock_state.vs); }
- /* Vertex Shader Constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) { - unsigned int idx = stateblock->contained_vs_consts_f[i]; + map = stateblock->changed.vs_consts_f[i];
- state->vs_consts_f[idx] = stateblock->stateblock_state.vs_consts_f[idx]; - wined3d_device_set_vs_consts_f(device, idx, 1, &stateblock->stateblock_state.vs_consts_f[idx]); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + + state->vs_consts_f[idx] = stateblock->stateblock_state.vs_consts_f[idx]; + wined3d_device_set_vs_consts_f(device, idx, 1, &stateblock->stateblock_state.vs_consts_f[idx]); + } } - for (i = 0; i < stateblock->num_contained_vs_consts_i; ++i) + map = stateblock->changed.vertexShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_i[i]; + idx = wined3d_bit_scan(&map);
state->vs_consts_i[idx] = stateblock->stateblock_state.vs_consts_i[idx]; wined3d_device_set_vs_consts_i(device, idx, 1, &stateblock->stateblock_state.vs_consts_i[idx]); } - for (i = 0; i < stateblock->num_contained_vs_consts_b; ++i) + map = stateblock->changed.vertexShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_b[i]; + idx = wined3d_bit_scan(&map);
state->vs_consts_b[idx] = stateblock->stateblock_state.vs_consts_b[idx]; wined3d_device_set_vs_consts_b(device, idx, 1, &stateblock->stateblock_state.vs_consts_b[idx]); @@ -1121,24 +1077,31 @@ void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, wined3d_device_set_pixel_shader(device, stateblock->stateblock_state.ps); }
- /* Pixel Shader Constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) { - unsigned int idx = stateblock->contained_ps_consts_f[i]; + map = stateblock->changed.ps_consts_f[i];
- state->ps_consts_f[idx] = stateblock->stateblock_state.ps_consts_f[idx]; - wined3d_device_set_ps_consts_f(device, idx, 1, &stateblock->stateblock_state.ps_consts_f[idx]); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + + state->ps_consts_f[idx] = stateblock->stateblock_state.ps_consts_f[idx]; + wined3d_device_set_ps_consts_f(device, idx, 1, &stateblock->stateblock_state.ps_consts_f[idx]); + } } - for (i = 0; i < stateblock->num_contained_ps_consts_i; ++i) + map = stateblock->changed.pixelShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_i[i]; + idx = wined3d_bit_scan(&map);
state->ps_consts_i[idx] = stateblock->stateblock_state.ps_consts_i[idx]; wined3d_device_set_ps_consts_i(device, idx, 1, &stateblock->stateblock_state.ps_consts_i[idx]); } - for (i = 0; i < stateblock->num_contained_ps_consts_b; ++i) + map = stateblock->changed.pixelShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_b[i]; + idx = wined3d_bit_scan(&map);
state->ps_consts_b[idx] = stateblock->stateblock_state.ps_consts_b[idx]; wined3d_device_set_ps_consts_b(device, idx, 1, &stateblock->stateblock_state.ps_consts_b[idx]); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index af372055dfb..f5edbe7edad 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3961,18 +3961,6 @@ struct wined3d_stateblock unsigned int num_contained_render_states; DWORD contained_transform_states[WINED3D_HIGHEST_TRANSFORM_STATE + 1]; unsigned int num_contained_transform_states; - DWORD contained_vs_consts_i[WINED3D_MAX_CONSTS_I]; - unsigned int num_contained_vs_consts_i; - DWORD contained_vs_consts_b[WINED3D_MAX_CONSTS_B]; - unsigned int num_contained_vs_consts_b; - DWORD contained_vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; - unsigned int num_contained_vs_consts_f; - DWORD contained_ps_consts_i[WINED3D_MAX_CONSTS_I]; - unsigned int num_contained_ps_consts_i; - DWORD contained_ps_consts_b[WINED3D_MAX_CONSTS_B]; - unsigned int num_contained_ps_consts_b; - DWORD contained_ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; - unsigned int num_contained_ps_consts_f; struct StageState contained_tss_states[WINED3D_MAX_TEXTURES * (WINED3D_HIGHEST_TEXTURE_STATE + 1)]; unsigned int num_contained_tss_states; struct StageState contained_sampler_states[WINED3D_MAX_COMBINED_SAMPLERS * WINED3D_HIGHEST_SAMPLER_STATE];
Signed-off-by: Henri Verbeet hverbeet@codeweavers.com
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Introduce wined3d_bitmap_ffs() and wined3d_bitmap_ffz(), implement wined3d_apply_shader_constants() on top of them. Tweak a few additional details while at it, mostly to reduce the amount of changes necessary when using the same function for wined3d_stateblock_capture() and wined3d_stateblock_apply() in followup patches.
dlls/wined3d/device.c | 171 ++++++++++++++++++++++-------------------- 1 file changed, 91 insertions(+), 80 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 3cf621ff46e..4520058eb60 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3832,16 +3832,87 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d return device->state.textures[stage]; }
+/* Count is the total number of bits in the bitmap (i.e. it doesn't depend on start). */ +static unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int start, unsigned int count) +{ + const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT; + const uint32_t *end = bitmap + (count + word_bit_count - 1) / word_bit_count; + const uint32_t *start_ptr = bitmap + start / word_bit_count; + const uint32_t *ptr = start_ptr; + uint32_t map, mask; + + if (ptr >= end) + return ~0u; + + mask = start % word_bit_count ? ~((1u << (start - 1) % word_bit_count) - 1) : 0xffffffffu; + while (!(map = *ptr & mask)) + { + if (++ptr == end) + return ~0u; + mask = ~0u; + } + return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map); +} + +static unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int start, unsigned int count) +{ + const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT; + const uint32_t *end = bitmap + (count + word_bit_count - 1) / word_bit_count; + const uint32_t *start_ptr = bitmap + start / word_bit_count; + const uint32_t *ptr = start_ptr; + uint32_t map, mask; + + if (ptr >= end) + return ~0u; + + mask = start % word_bit_count ? ~((1u << (start - 1) % word_bit_count) - 1) : 0xffffffffu; + while (!(map = ~*ptr & mask)) + { + if (++ptr == end) + return ~0u; + mask = ~0u; + } + return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map); +} + +typedef HRESULT (CDECL *wined3d_state_shader_constant_setter)(struct wined3d_device *device, + unsigned int start_idx, unsigned int count, void *constants); + +static void wined3d_apply_shader_constants(struct wined3d_device *device, + struct wined3d_stateblock_state *state, + const DWORD *bitmap, unsigned int bit_count, void *data, unsigned int stride, + wined3d_state_shader_constant_setter shader_constant_setter) +{ + BYTE *byte_data = data; + unsigned int start, end; + + start = 0; + for (;;) + { + start = wined3d_bitmap_ffs(bitmap, start, bit_count); + if (start == ~0u) + return; + + end = wined3d_bitmap_ffz(bitmap, start + 1, bit_count); + if (end == ~0u) + end = bit_count; + + shader_constant_setter(device, start, end - start, &byte_data[start * stride]); + + start = end + 1; + } +} + void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, struct wined3d_stateblock *stateblock) { - const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; const struct wined3d_saved_states *changed = &stateblock->changed; struct wined3d_blend_state *blend_state; struct wined3d_color colour; - unsigned int i, j, count; BOOL set_blend_state; + unsigned int i, j; + DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3850,89 +3921,29 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, if (changed->pixelShader) wined3d_device_set_pixel_shader(device, state->ps);
- count = 0; - for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) - { - if (wined3d_bitmap_is_set(changed->vs_consts_f, i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); + wined3d_apply_shader_constants(device, NULL, changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, + (void *)state->vs_consts_f, sizeof(*state->vs_consts_f), + (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (changed->vertexShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); + map = changed->vertexShaderConstantsB; + wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_B, (void *)state->vs_consts_b, + sizeof(*state->vs_consts_b), (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (changed->vertexShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); + map = changed->vertexShaderConstantsI; + wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_I, (void *)state->vs_consts_i, + sizeof(*state->vs_consts_i), (wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_i);
- count = 0; - for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) - { - if (wined3d_bitmap_is_set(changed->ps_consts_f, i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); + wined3d_apply_shader_constants(device, NULL, changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, + (void *)state->ps_consts_f, sizeof(*state->ps_consts_f), + (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (changed->pixelShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); + map = changed->pixelShaderConstantsB; + wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_B, (void *)state->ps_consts_b, + sizeof(*state->ps_consts_b), (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (changed->pixelShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); + map = changed->pixelShaderConstantsI; + wined3d_apply_shader_constants(device, NULL, &map, WINED3D_MAX_CONSTS_I, (void *)state->ps_consts_i, + sizeof(*state->ps_consts_i), (wined3d_state_shader_constant_setter)wined3d_device_set_ps_consts_i);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i) {
On Fri, 21 Feb 2020 at 01:11, Matteo Bruni mbruni@codeweavers.com wrote:
+/* Count is the total number of bits in the bitmap (i.e. it doesn't depend on start). */ +static unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int start, unsigned int count) +{
One way to make that more obvious would be to move the "count" parameter after "bitmap" instead of "start", and call it something like "bit_count".
- mask = start % word_bit_count ? ~((1u << (start - 1) % word_bit_count) - 1) : 0xffffffffu;
"mask = ~0u << (start % word_bit_count);", right?
- while (!(map = *ptr & mask))
- {
if (++ptr == end)
return ~0u;
mask = ~0u;
- }
Since the mask only does something on the first iteration, how about the following:
map = *ptr & (~0u << (start % word_bit_count)); while (!map) { if (++ptr == end); return ~0u; map = *ptr; }
- return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map);
+}
This may not be a problem in practice, but note that this can potentially return a value >= "count" if "count" is not a multiple of "word_bit_count".
+static unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int start, unsigned int count) +{
...
- while (!(map = ~*ptr & mask))
So this line is the main difference with wined3d_bitmap_ffs(). Assuming it wouldn't have any adverse performance effects, that could be unified by replacing "~*ptr" with "*ptr ^ xor_mask", with "xor_mask" being 0 for wined3d_bitmap_ffs() and ~0u for wined3d_bitmap_ffz().
- wined3d_apply_shader_constants(device, NULL, changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F,
(void *)state->vs_consts_f, sizeof(*state->vs_consts_f),
(wined3d_state_shader_constant_setter)wined3d_device_set_vs_consts_f);
This works, but is a little messy. How do you feel about the following:
struct wined3d_map_range range; ... for (start = 0; ; start = range.offset) { if (!wined3d_bitmap_get_range(state->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range)) break; wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]); }
We could conceivably also introduce some kind of WINED3D_BITMAP_FOR_EACH_RANGE macro, although I suspect it may not be worth it.
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/device.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 4520058eb60..beb58d5f068 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3908,10 +3908,11 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, { const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; const struct wined3d_saved_states *changed = &stateblock->changed; + const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT; struct wined3d_blend_state *blend_state; struct wined3d_color colour; + unsigned int i, j, idx; BOOL set_blend_state; - unsigned int i, j; DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock); @@ -4010,10 +4011,15 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, } }
- for (i = 0; i < ARRAY_SIZE(state->transforms); ++i) + for (i = 0; i < ARRAY_SIZE(changed->transform); ++i) { - if (wined3d_bitmap_is_set(changed->transform, i)) - wined3d_device_set_transform(device, i, &state->transforms[i]); + map = changed->transform[i]; + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + wined3d_device_set_transform(device, idx, &state->transforms[idx]); + } }
if (changed->indices)