Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/device.c | 5 ++- dlls/wined3d/stateblock.c | 63 ++++++++++++++++++++++++---------- dlls/wined3d/wined3d_private.h | 4 +-- 3 files changed, 48 insertions(+), 24 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 84056437414..2297d7de916 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -27,7 +27,6 @@ #include "config.h" #include "wine/port.h"
-#include <stdio.h> #ifdef HAVE_FLOAT_H # include <float.h> #endif @@ -3848,7 +3847,7 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, count = 0; for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) { - if (stateblock->changed.vs_consts_f[i]) + if (stateblock->changed.vs_consts_f[i >> 5] & (1u << (i & 0x1f))) ++count; else if (count) { @@ -3890,7 +3889,7 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, count = 0; for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) { - if (stateblock->changed.ps_consts_f[i]) + if (stateblock->changed.ps_consts_f[i >> 5] & (1u << (i & 0x1f))) ++count; else if (count) { diff --git a/dlls/wined3d/stateblock.c b/dlls/wined3d/stateblock.c index 499d23fbbd3..6d1562cc60e 100644 --- a/dlls/wined3d/stateblock.c +++ b/dlls/wined3d/stateblock.c @@ -189,7 +189,7 @@ static const DWORD vertex_states_sampler[] = WINED3D_SAMP_DMAP_OFFSET, };
-static inline void stateblock_set_bits(DWORD *map, UINT map_size) +static inline void stateblock_set_all_bits(DWORD *map, UINT map_size) { DWORD mask = (1u << (map_size & 0x1f)) - 1; memset(map, 0xff, (map_size >> 5) * sizeof(*map)); @@ -201,7 +201,6 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, { unsigned int i;
- /* Single values */ states->indices = 1; states->material = 1; states->viewport = 1; @@ -210,12 +209,11 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, states->vertexShader = 1; states->scissorRect = 1;
- /* Fixed size arrays */ states->streamSource = 0xffff; states->streamFreq = 0xffff; states->textures = 0xfffff; - stateblock_set_bits(states->transform, WINED3D_HIGHEST_TRANSFORM_STATE + 1); - stateblock_set_bits(states->renderState, WINEHIGHEST_RENDER_STATE + 1); + stateblock_set_all_bits(states->transform, WINED3D_HIGHEST_TRANSFORM_STATE + 1); + stateblock_set_all_bits(states->renderState, WINEHIGHEST_RENDER_STATE + 1); for (i = 0; i < WINED3D_MAX_TEXTURES; ++i) states->textureState[i] = 0x3ffff; for (i = 0; i < WINED3D_MAX_COMBINED_SAMPLERS; ++i) states->samplerState[i] = 0x3ffe; states->clipplane = (1u << WINED3D_MAX_CLIP_DISTANCES) - 1; @@ -224,9 +222,8 @@ static void stateblock_savedstates_set_all(struct wined3d_saved_states *states, states->vertexShaderConstantsB = 0xffff; states->vertexShaderConstantsI = 0xffff;
- /* Dynamically sized arrays */ - memset(states->ps_consts_f, TRUE, sizeof(BOOL) * ps_consts); - memset(states->vs_consts_f, TRUE, sizeof(BOOL) * vs_consts); + memset(states->ps_consts_f, 0xffu, sizeof(states->ps_consts_f)); + memset(states->vs_consts_f, 0xffu, sizeof(states->vs_consts_f)); }
static void stateblock_savedstates_set_pixel(struct wined3d_saved_states *states, const DWORD num_constants) @@ -252,7 +249,7 @@ static void stateblock_savedstates_set_pixel(struct wined3d_saved_states *states states->pixelShaderConstantsB = 0xffff; states->pixelShaderConstantsI = 0xffff;
- memset(states->ps_consts_f, TRUE, sizeof(BOOL) * num_constants); + memset(states->ps_consts_f, 0xffu, sizeof(states->ps_consts_f)); }
static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *states, const DWORD num_constants) @@ -279,12 +276,12 @@ static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *state states->vertexShaderConstantsB = 0xffff; states->vertexShaderConstantsI = 0xffff;
- memset(states->vs_consts_f, TRUE, sizeof(BOOL) * num_constants); + memset(states->vs_consts_f, 0xffu, sizeof(states->vs_consts_f)); }
void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *stateblock) { - const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; unsigned int i, j;
for (i = 0; i <= WINEHIGHEST_RENDER_STATE >> 5; ++i) @@ -311,11 +308,14 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) { - if (stateblock->changed.vs_consts_f[i]) + DWORD bitmask = stateblock->changed.vs_consts_f[i]; + + while (bitmask) { - stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i; + j = wined3d_bit_scan(&bitmask); + stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i * word_bit_count + j; ++stateblock->num_contained_vs_consts_f; } } @@ -338,11 +338,14 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) { - if (stateblock->changed.ps_consts_f[i]) + DWORD bitmask = stateblock->changed.ps_consts_f[i]; + + while (bitmask) { - stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i; + j = wined3d_bit_scan(&bitmask); + stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i * word_bit_count + j; ++stateblock->num_contained_ps_consts_f; } } @@ -1280,6 +1283,28 @@ void CDECL wined3d_stateblock_set_vertex_shader(struct wined3d_stateblock *state stateblock->changed.vertexShader = TRUE; }
+static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsigned int count) +{ + const unsigned int word_bit_count = sizeof(*bitmask) * CHAR_BIT; + const unsigned int shift = offset % word_bit_count; + + bitmask += offset / word_bit_count; + *bitmask |= ~0u >> (word_bit_count - min(count, word_bit_count)) << shift; + ++bitmask; + count -= min(count, word_bit_count - shift); + if (!count) + return; + if (count >= word_bit_count) + { + memset(bitmask, 0xffu, count / CHAR_BIT); + bitmask += count / word_bit_count; + count = count % word_bit_count; + if (!count) + return; + } + *bitmask |= (1u << count) - 1; +} + HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { @@ -1290,7 +1315,7 @@ HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stat return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants)); - memset(&stateblock->changed.vs_consts_f[start_idx], 1, count * sizeof(*stateblock->changed.vs_consts_f)); + wined3d_bitmask_set_bits(stateblock->changed.vs_consts_f, start_idx, count); return WINED3D_OK; }
@@ -1356,7 +1381,7 @@ HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stat return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants)); - memset(&stateblock->changed.ps_consts_f[start_idx], 1, count * sizeof(*stateblock->changed.ps_consts_f)); + wined3d_bitmask_set_bits(stateblock->changed.ps_consts_f, start_idx, count); return WINED3D_OK; }
diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index 0e1e55d52bf..c8b22832e65 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3919,10 +3919,10 @@ struct wined3d_saved_states DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */ WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - BOOL ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; + DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5]; WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - BOOL vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; + DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5]; DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */ DWORD indices : 1; DWORD material : 1;
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/stateblock.c | 178 ++++++++++++++------------------- dlls/wined3d/wined3d_private.h | 12 --- 2 files changed, 75 insertions(+), 115 deletions(-)
diff --git a/dlls/wined3d/stateblock.c b/dlls/wined3d/stateblock.c index 6d1562cc60e..c4927d53d36 100644 --- a/dlls/wined3d/stateblock.c +++ b/dlls/wined3d/stateblock.c @@ -281,7 +281,6 @@ static void stateblock_savedstates_set_vertex(struct wined3d_saved_states *state
void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *stateblock) { - const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; unsigned int i, j;
for (i = 0; i <= WINEHIGHEST_RENDER_STATE >> 5; ++i) @@ -308,66 +307,6 @@ void CDECL wined3d_stateblock_init_contained_states(struct wined3d_stateblock *s } }
- for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) - { - DWORD bitmask = stateblock->changed.vs_consts_f[i]; - - while (bitmask) - { - j = wined3d_bit_scan(&bitmask); - stateblock->contained_vs_consts_f[stateblock->num_contained_vs_consts_f] = i * word_bit_count + j; - ++stateblock->num_contained_vs_consts_f; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.vertexShaderConstantsI & (1u << i)) - { - stateblock->contained_vs_consts_i[stateblock->num_contained_vs_consts_i] = i; - ++stateblock->num_contained_vs_consts_i; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.vertexShaderConstantsB & (1u << i)) - { - stateblock->contained_vs_consts_b[stateblock->num_contained_vs_consts_b] = i; - ++stateblock->num_contained_vs_consts_b; - } - } - - for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) - { - DWORD bitmask = stateblock->changed.ps_consts_f[i]; - - while (bitmask) - { - j = wined3d_bit_scan(&bitmask); - stateblock->contained_ps_consts_f[stateblock->num_contained_ps_consts_f] = i * word_bit_count + j; - ++stateblock->num_contained_ps_consts_f; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.pixelShaderConstantsI & (1u << i)) - { - stateblock->contained_ps_consts_i[stateblock->num_contained_ps_consts_i] = i; - ++stateblock->num_contained_ps_consts_i; - } - } - - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.pixelShaderConstantsB & (1u << i)) - { - stateblock->contained_ps_consts_b[stateblock->num_contained_ps_consts_b] = i; - ++stateblock->num_contained_ps_consts_b; - } - } - for (i = 0; i < WINED3D_MAX_TEXTURES; ++i) { DWORD map = stateblock->changed.textureState[i]; @@ -762,8 +701,9 @@ static void wined3d_state_record_lights(struct wined3d_light_state *dst_state, void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, const struct wined3d_stateblock *device_state) { + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; const struct wined3d_stateblock_state *state = &device_state->stateblock_state; - unsigned int i; + unsigned int i, j, idx; DWORD map;
TRACE("stateblock %p, device_state %p.\n", stateblock, device_state); @@ -779,30 +719,34 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, stateblock->stateblock_state.vs = state->vs; }
- /* Vertex shader float constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.vs_consts_f); ++i) { - unsigned int idx = stateblock->contained_vs_consts_f[i]; + map = stateblock->changed.vs_consts_f[i];
- TRACE("Setting vs_consts_f[%u] to %s.\n", idx, debug_vec4(&state->vs_consts_f[idx])); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- stateblock->stateblock_state.vs_consts_f[idx] = state->vs_consts_f[idx]; + TRACE("Setting vs_consts_f[%u] to %s.\n", idx, debug_vec4(&state->vs_consts_f[idx])); + stateblock->stateblock_state.vs_consts_f[idx] = state->vs_consts_f[idx]; + } }
- /* Vertex shader integer constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_i; ++i) + map = stateblock->changed.vertexShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_i[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting vs_consts_i[%u] to %s.\n", idx, debug_ivec4(&state->vs_consts_i[idx]));
stateblock->stateblock_state.vs_consts_i[idx] = state->vs_consts_i[idx]; }
- /* Vertex shader boolean constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_b; ++i) + map = stateblock->changed.vertexShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_b[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting vs_consts_b[%u] to %s.\n", idx, state->vs_consts_b[idx] ? "TRUE" : "FALSE"); @@ -810,30 +754,34 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, stateblock->stateblock_state.vs_consts_b[idx] = state->vs_consts_b[idx]; }
- /* Pixel shader float constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_f; ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.ps_consts_f); ++i) { - unsigned int idx = stateblock->contained_ps_consts_f[i]; + map = stateblock->changed.ps_consts_f[i];
- TRACE("Setting ps_consts_f[%u] to %s.\n", idx, debug_vec4(&state->ps_consts_f[idx])); + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- stateblock->stateblock_state.ps_consts_f[idx] = state->ps_consts_f[idx]; + TRACE("Setting ps_consts_f[%u] to %s.\n", idx, debug_vec4(&state->ps_consts_f[idx])); + stateblock->stateblock_state.ps_consts_f[idx] = state->ps_consts_f[idx]; + } }
- /* Pixel shader integer constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_i; ++i) + map = stateblock->changed.pixelShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_i[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting ps_consts_i[%u] to %s.\n", idx, debug_ivec4(&state->ps_consts_i[idx]));
stateblock->stateblock_state.ps_consts_i[idx] = state->ps_consts_i[idx]; }
- /* Pixel shader boolean constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_b; ++i) + map = stateblock->changed.pixelShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_b[i]; + idx = wined3d_bit_scan(&map);
TRACE("Setting ps_consts_b[%u] to %s.\n", idx, state->ps_consts_b[idx] ? "TRUE" : "FALSE"); @@ -1026,9 +974,11 @@ void CDECL wined3d_stateblock_capture(struct wined3d_stateblock *stateblock, void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, struct wined3d_stateblock *device_state) { + const unsigned int word_bit_count = sizeof(*stateblock->changed.vs_consts_f) * CHAR_BIT; struct wined3d_stateblock_state *state = &device_state->stateblock_state; struct wined3d_device *device = stateblock->device; - unsigned int i; + const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info; + unsigned int i, j, idx; DWORD map;
TRACE("stateblock %p, device_state %p.\n", stateblock, device_state); @@ -1044,23 +994,31 @@ void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, }
/* Vertex Shader Constants. */ - for (i = 0; i < stateblock->num_contained_vs_consts_f; ++i) + for (i = 0; i < d3d_info->limits.vs_uniform_count / word_bit_count; ++i) { - unsigned int idx = stateblock->contained_vs_consts_f[i]; + map = stateblock->changed.vs_consts_f[i]; + + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- state->vs_consts_f[idx] = stateblock->stateblock_state.vs_consts_f[idx]; - wined3d_device_set_vs_consts_f(device, idx, 1, &stateblock->stateblock_state.vs_consts_f[idx]); + state->vs_consts_f[idx] = stateblock->stateblock_state.vs_consts_f[idx]; + wined3d_device_set_vs_consts_f(device, idx, 1, &stateblock->stateblock_state.vs_consts_f[idx]); + } } - for (i = 0; i < stateblock->num_contained_vs_consts_i; ++i) + map = stateblock->changed.vertexShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_i[i]; + idx = wined3d_bit_scan(&map);
state->vs_consts_i[idx] = stateblock->stateblock_state.vs_consts_i[idx]; wined3d_device_set_vs_consts_i(device, idx, 1, &stateblock->stateblock_state.vs_consts_i[idx]); } - for (i = 0; i < stateblock->num_contained_vs_consts_b; ++i) + map = stateblock->changed.vertexShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_vs_consts_b[i]; + idx = wined3d_bit_scan(&map);
state->vs_consts_b[idx] = stateblock->stateblock_state.vs_consts_b[idx]; wined3d_device_set_vs_consts_b(device, idx, 1, &stateblock->stateblock_state.vs_consts_b[idx]); @@ -1094,23 +1052,31 @@ void CDECL wined3d_stateblock_apply(const struct wined3d_stateblock *stateblock, }
/* Pixel Shader Constants. */ - for (i = 0; i < stateblock->num_contained_ps_consts_f; ++i) + for (i = 0; i < d3d_info->limits.ps_uniform_count / word_bit_count; ++i) { - unsigned int idx = stateblock->contained_ps_consts_f[i]; + map = stateblock->changed.ps_consts_f[i]; + + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j;
- state->ps_consts_f[idx] = stateblock->stateblock_state.ps_consts_f[idx]; - wined3d_device_set_ps_consts_f(device, idx, 1, &stateblock->stateblock_state.ps_consts_f[idx]); + state->ps_consts_f[idx] = stateblock->stateblock_state.ps_consts_f[idx]; + wined3d_device_set_ps_consts_f(device, idx, 1, &stateblock->stateblock_state.ps_consts_f[idx]); + } } - for (i = 0; i < stateblock->num_contained_ps_consts_i; ++i) + map = stateblock->changed.pixelShaderConstantsI; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_i[i]; + idx = wined3d_bit_scan(&map);
state->ps_consts_i[idx] = stateblock->stateblock_state.ps_consts_i[idx]; wined3d_device_set_ps_consts_i(device, idx, 1, &stateblock->stateblock_state.ps_consts_i[idx]); } - for (i = 0; i < stateblock->num_contained_ps_consts_b; ++i) + map = stateblock->changed.pixelShaderConstantsB; + while (map) { - unsigned int idx = stateblock->contained_ps_consts_b[i]; + idx = wined3d_bit_scan(&map);
state->ps_consts_b[idx] = stateblock->stateblock_state.ps_consts_b[idx]; wined3d_device_set_ps_consts_b(device, idx, 1, &stateblock->stateblock_state.ps_consts_b[idx]); @@ -1308,10 +1274,13 @@ static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsign HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { + const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_VS_CONSTS_F || count > WINED3D_MAX_VS_CONSTS_F - start_idx) + if (!constants || start_idx >= d3d_info->limits.vs_uniform_count + || count > d3d_info->limits.vs_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants)); @@ -1374,10 +1343,13 @@ void CDECL wined3d_stateblock_set_pixel_shader(struct wined3d_stateblock *stateb HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) { + const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; + TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_PS_CONSTS_F || count > WINED3D_MAX_PS_CONSTS_F - start_idx) + if (!constants || start_idx >= d3d_info->limits.ps_uniform_count + || count > d3d_info->limits.ps_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants)); diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index c8b22832e65..41eaef86ab1 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3956,18 +3956,6 @@ struct wined3d_stateblock unsigned int num_contained_render_states; DWORD contained_transform_states[WINED3D_HIGHEST_TRANSFORM_STATE + 1]; unsigned int num_contained_transform_states; - DWORD contained_vs_consts_i[WINED3D_MAX_CONSTS_I]; - unsigned int num_contained_vs_consts_i; - DWORD contained_vs_consts_b[WINED3D_MAX_CONSTS_B]; - unsigned int num_contained_vs_consts_b; - DWORD contained_vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; - unsigned int num_contained_vs_consts_f; - DWORD contained_ps_consts_i[WINED3D_MAX_CONSTS_I]; - unsigned int num_contained_ps_consts_i; - DWORD contained_ps_consts_b[WINED3D_MAX_CONSTS_B]; - unsigned int num_contained_ps_consts_b; - DWORD contained_ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; - unsigned int num_contained_ps_consts_f; struct StageState contained_tss_states[WINED3D_MAX_TEXTURES * (WINED3D_HIGHEST_TEXTURE_STATE + 1)]; unsigned int num_contained_tss_states; struct StageState contained_sampler_states[WINED3D_MAX_COMBINED_SAMPLERS * WINED3D_HIGHEST_SAMPLER_STATE];
On Mon, 10 Feb 2020 at 23:07, Matteo Bruni mbruni@codeweavers.com wrote:
@@ -1308,10 +1274,13 @@ static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsign HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) {
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
- TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_VS_CONSTS_F || count > WINED3D_MAX_VS_CONSTS_F - start_idx)
if (!constants || start_idx >= d3d_info->limits.vs_uniform_count
|| count > d3d_info->limits.vs_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants));
@@ -1374,10 +1343,13 @@ void CDECL wined3d_stateblock_set_pixel_shader(struct wined3d_stateblock *stateb HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) {
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
- TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_PS_CONSTS_F || count > WINED3D_MAX_PS_CONSTS_F - start_idx)
if (!constants || start_idx >= d3d_info->limits.ps_uniform_count
|| count > d3d_info->limits.ps_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants));
Those seem like unrelated changes.
On Tue, Feb 11, 2020 at 5:33 PM Henri Verbeet hverbeet@gmail.com wrote:
On Mon, 10 Feb 2020 at 23:07, Matteo Bruni mbruni@codeweavers.com wrote:
@@ -1308,10 +1274,13 @@ static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsign HRESULT CDECL wined3d_stateblock_set_vs_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) {
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
- TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_VS_CONSTS_F || count > WINED3D_MAX_VS_CONSTS_F - start_idx)
if (!constants || start_idx >= d3d_info->limits.vs_uniform_count
|| count > d3d_info->limits.vs_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.vs_consts_f[start_idx], constants, count * sizeof(*constants));
@@ -1374,10 +1343,13 @@ void CDECL wined3d_stateblock_set_pixel_shader(struct wined3d_stateblock *stateb HRESULT CDECL wined3d_stateblock_set_ps_consts_f(struct wined3d_stateblock *stateblock, unsigned int start_idx, unsigned int count, const struct wined3d_vec4 *constants) {
- const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info;
- TRACE("stateblock %p, start_idx %u, count %u, constants %p.\n", stateblock, start_idx, count, constants);
- if (!constants || start_idx >= WINED3D_MAX_PS_CONSTS_F || count > WINED3D_MAX_PS_CONSTS_F - start_idx)
if (!constants || start_idx >= d3d_info->limits.ps_uniform_count
|| count > d3d_info->limits.ps_uniform_count - start_idx) return WINED3DERR_INVALIDCALL;
memcpy(&stateblock->stateblock_state.ps_consts_f[start_idx], constants, count * sizeof(*constants));
Those seem like unrelated changes.
Somewhat. In this same patch I'm moving wined3d_stateblock_capture() to using ARRAY_SIZE(stateblock->changed.?s_consts_f), this makes sure we're not flagging constants >= d3d_info->limits.?s_uniform_count but still < WINED3D_MAX_?S_CONSTS_F. It's certainly not a big deal, let me know how you prefer that I update the patch (options that I can think of: split these changes out to a separate patch, don't change the loop bounds in wined3d_stateblock_capture(), drop these changes entirely).
On Tue, 11 Feb 2020 at 21:12, Matteo Bruni matteo.mystral@gmail.com wrote:
Somewhat. In this same patch I'm moving wined3d_stateblock_capture() to using ARRAY_SIZE(stateblock->changed.?s_consts_f), this makes sure we're not flagging constants >= d3d_info->limits.?s_uniform_count but still < WINED3D_MAX_?S_CONSTS_F. It's certainly not a big deal, let me know how you prefer that I update the patch (options that I can think of: split these changes out to a separate patch, don't change the loop bounds in wined3d_stateblock_capture(), drop these changes entirely).
I'd probably just put it in a separate patch. Although I also think that at least at the time these stateblock functions were being written, there was some hope that we could get rid of the "device" field in the stateblock, and just check these kinds of limits in d3d9 etc.
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- It turns out 7654d58b047be3a54d814d890bf1a26374559c83 caused a pretty sizeable performance regression (which gets a lot more significant once wined3d performance gets better - I have patches in that area). Let's start fixing this.
dlls/wined3d/device.c | 151 ++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 79 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 2297d7de916..87684dc1c46 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3830,12 +3830,67 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d return device->state.textures[stage]; }
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device, + unsigned int start_idx, unsigned int count, const void *constants); + +static void device_apply_shader_constants(struct wined3d_device *device, + const struct wined3d_stateblock_state *state, + DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride, + wined3d_device_shader_constant_setter shader_constant_setter) +{ + const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT; + unsigned int i, j, idx, start, last; + const BYTE *byte_data = data; + DWORD map; + + start = last = ~0u; + for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i) + { + map = bitmap[i]; + + if (map == ~0u) + { + if (last != ~0u && last != i * word_bit_count - 1) + { + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); + start = i * word_bit_count; + } + if (start == ~0u) + start = i * word_bit_count; + last = i * word_bit_count + word_bit_count - 1; + continue; + } + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + + if (start == ~0u) + { + start = last = idx; + } + else if (last != idx - 1) + { + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); + start = last = idx; + } + else + { + last = idx; + } + } + } + if (start != ~0u) + shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]); +} + void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, struct wined3d_stateblock *stateblock) { const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; - unsigned int i, j, count; + unsigned int i, j; + DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3844,89 +3899,27 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, if (stateblock->changed.pixelShader) wined3d_device_set_pixel_shader(device, state->ps);
- count = 0; - for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) - { - if (stateblock->changed.vs_consts_f[i >> 5] & (1u << (i & 0x1f))) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); + device_apply_shader_constants(device, state, stateblock->changed.vs_consts_f, d3d_info->limits.vs_uniform_count, + state->vs_consts_f, sizeof(*state->vs_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.vertexShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); + map = stateblock->changed.vertexShaderConstantsB; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->vs_consts_b, sizeof(*state->vs_consts_b), + (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.vertexShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); + map = stateblock->changed.vertexShaderConstantsI; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->vs_consts_i, sizeof(*state->vs_consts_i), + (wined3d_device_shader_constant_setter)wined3d_device_set_vs_consts_i);
- count = 0; - for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) - { - if (stateblock->changed.ps_consts_f[i >> 5] & (1u << (i & 0x1f))) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); + device_apply_shader_constants(device, state, stateblock->changed.ps_consts_f, d3d_info->limits.ps_uniform_count, + state->ps_consts_f, sizeof(*state->ps_consts_f), (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_f);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) - { - if (stateblock->changed.pixelShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); + map = stateblock->changed.pixelShaderConstantsB; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_B, state->ps_consts_b, sizeof(*state->ps_consts_b), + (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_b);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) - { - if (stateblock->changed.pixelShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); - count = 0; - } - } - if (count) - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); + map = stateblock->changed.pixelShaderConstantsI; + device_apply_shader_constants(device, state, &map, WINED3D_MAX_CONSTS_I, state->ps_consts_i, sizeof(*state->ps_consts_i), + (wined3d_device_shader_constant_setter)wined3d_device_set_ps_consts_i);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i) {
On Mon, 10 Feb 2020 at 23:06, Matteo Bruni mbruni@codeweavers.com wrote:
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device,
unsigned int start_idx, unsigned int count, const void *constants);
+static void device_apply_shader_constants(struct wined3d_device *device,
const struct wined3d_stateblock_state *state,
DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride,
wined3d_device_shader_constant_setter shader_constant_setter)
+{
- const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT;
- unsigned int i, j, idx, start, last;
- const BYTE *byte_data = data;
- DWORD map;
- start = last = ~0u;
- for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i)
- {
map = bitmap[i];
if (map == ~0u)
{
if (last != ~0u && last != i * word_bit_count - 1)
{
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
start = i * word_bit_count;
}
if (start == ~0u)
start = i * word_bit_count;
last = i * word_bit_count + word_bit_count - 1;
continue;
}
while (map)
{
j = wined3d_bit_scan(&map);
idx = i * word_bit_count + j;
if (start == ~0u)
{
start = last = idx;
}
else if (last != idx - 1)
{
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
start = last = idx;
}
else
{
last = idx;
}
}
- }
- if (start != ~0u)
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+}
This looks like it's a fair bit more complicated than it needs to be. I think the primitives you want are something like the following:
unsigned int wined3d_bitmap_ffs(uint32_t *bitmap, unsigned int start, unsigned int count); unsigned int wined3d_bitmap_ffz(uint32_t *bitmap, unsigned int start, unsigned int count);
and then you can extract the ranges relatively trivially.
On Tue, Feb 11, 2020 at 5:33 PM Henri Verbeet hverbeet@gmail.com wrote:
On Mon, 10 Feb 2020 at 23:06, Matteo Bruni mbruni@codeweavers.com wrote:
+typedef HRESULT (CDECL *wined3d_device_shader_constant_setter)(struct wined3d_device *device,
unsigned int start_idx, unsigned int count, const void *constants);
+static void device_apply_shader_constants(struct wined3d_device *device,
const struct wined3d_stateblock_state *state,
DWORD *bitmap, unsigned int bit_count, const void *data, unsigned int stride,
wined3d_device_shader_constant_setter shader_constant_setter)
+{
- const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT;
- unsigned int i, j, idx, start, last;
- const BYTE *byte_data = data;
- DWORD map;
- start = last = ~0u;
- for (i = 0; i < (bit_count + word_bit_count - 1) / word_bit_count; ++i)
- {
map = bitmap[i];
if (map == ~0u)
{
if (last != ~0u && last != i * word_bit_count - 1)
{
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
start = i * word_bit_count;
}
if (start == ~0u)
start = i * word_bit_count;
last = i * word_bit_count + word_bit_count - 1;
continue;
}
while (map)
{
j = wined3d_bit_scan(&map);
idx = i * word_bit_count + j;
if (start == ~0u)
{
start = last = idx;
}
else if (last != idx - 1)
{
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
start = last = idx;
}
else
{
last = idx;
}
}
- }
- if (start != ~0u)
shader_constant_setter(device, start, last - start + 1, &byte_data[start * stride]);
+}
This looks like it's a fair bit more complicated than it needs to be. I think the primitives you want are something like the following:
unsigned int wined3d_bitmap_ffs(uint32_t *bitmap, unsigned int
start, unsigned int count); unsigned int wined3d_bitmap_ffz(uint32_t *bitmap, unsigned int start, unsigned int count);
and then you can extract the ranges relatively trivially.
Oh, interesting. I'll update the patch along those lines and see what the end result looks like.
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/device.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 87684dc1c46..d8992424797 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3889,7 +3889,8 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, { const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; - unsigned int i, j; + const unsigned int word_bit_count = sizeof(DWORD) * CHAR_BIT; + unsigned int i, j, idx; DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock); @@ -3968,10 +3969,15 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, } }
- for (i = 0; i < ARRAY_SIZE(state->transforms); ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.transform); ++i) { - if (stateblock->changed.transform[i >> 5] & (1u << (i & 0x1f))) - wined3d_device_set_transform(device, i, &state->transforms[i]); + map = stateblock->changed.transform[i]; + while (map) + { + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + wined3d_device_set_transform(device, idx, &state->transforms[idx]); + } }
if (stateblock->changed.indices)
Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- dlls/wined3d/device.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index d8992424797..64b783395e4 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3933,18 +3933,21 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, } }
- for (i = 0; i < ARRAY_SIZE(state->rs); ++i) + for (i = 0; i < ARRAY_SIZE(stateblock->changed.renderState); ++i) { - if (stateblock->changed.renderState[i >> 5] & (1u << (i & 0x1f))) + map = stateblock->changed.renderState[i]; + while (map) { - if (i == WINED3D_RS_BLENDFACTOR) + j = wined3d_bit_scan(&map); + idx = i * word_bit_count + j; + if (idx == WINED3D_RS_BLENDFACTOR) { struct wined3d_color color; - wined3d_color_from_d3dcolor(&color, state->rs[i]); + wined3d_color_from_d3dcolor(&color, state->rs[idx]); wined3d_device_set_blend_state(device, NULL, &color); } else - wined3d_device_set_render_state(device, i, state->rs[i]); + wined3d_device_set_render_state(device, idx, state->rs[idx]); } }
On Mon, 10 Feb 2020 at 23:06, Matteo Bruni mbruni@codeweavers.com wrote:
+static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsigned int count) +{
- const unsigned int word_bit_count = sizeof(*bitmask) * CHAR_BIT;
- const unsigned int shift = offset % word_bit_count;
- bitmask += offset / word_bit_count;
- *bitmask |= ~0u >> (word_bit_count - min(count, word_bit_count)) << shift;
- ++bitmask;
- count -= min(count, word_bit_count - shift);
- if (!count)
return;
- if (count >= word_bit_count)
- {
memset(bitmask, 0xffu, count / CHAR_BIT);
bitmask += count / word_bit_count;
count = count % word_bit_count;
if (!count)
return;
- }
- *bitmask |= (1u << count) - 1;
+}
Does this intentionally not handle 0 count? I also suspect this has some room for simplification.
On Tue, Feb 11, 2020 at 5:33 PM Henri Verbeet hverbeet@gmail.com wrote:
On Mon, 10 Feb 2020 at 23:06, Matteo Bruni mbruni@codeweavers.com wrote:
+static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsigned int count) +{
- const unsigned int word_bit_count = sizeof(*bitmask) * CHAR_BIT;
- const unsigned int shift = offset % word_bit_count;
- bitmask += offset / word_bit_count;
- *bitmask |= ~0u >> (word_bit_count - min(count, word_bit_count)) << shift;
- ++bitmask;
- count -= min(count, word_bit_count - shift);
- if (!count)
return;
- if (count >= word_bit_count)
- {
memset(bitmask, 0xffu, count / CHAR_BIT);
bitmask += count / word_bit_count;
count = count % word_bit_count;
if (!count)
return;
- }
- *bitmask |= (1u << count) - 1;
+}
Does this intentionally not handle 0 count? I also suspect this has some room for simplification.
I wrote this patch a long time ago and looked through it so many times that it's hard for me to see problems. That's where review helps I guess :) I think I intentionally don't handle 0 count, I guess I could add an assert() at least. WRT simplification, this was originally written when I was looking into a game that sets most (or all) the float constants in one go so it somewhat reflects that (e.g. I think I started from the memset() part and then tacked the rest around it afterwards). I'll try to simplify it but I might ask for more specific suggestions if I can't find anything substantial...
On Tue, 11 Feb 2020 at 21:12, Matteo Bruni matteo.mystral@gmail.com wrote:
On Tue, Feb 11, 2020 at 5:33 PM Henri Verbeet hverbeet@gmail.com wrote:
On Mon, 10 Feb 2020 at 23:06, Matteo Bruni mbruni@codeweavers.com wrote:
+static void wined3d_bitmask_set_bits(DWORD *bitmask, unsigned int offset, unsigned int count) +{
- const unsigned int word_bit_count = sizeof(*bitmask) * CHAR_BIT;
- const unsigned int shift = offset % word_bit_count;
- bitmask += offset / word_bit_count;
- *bitmask |= ~0u >> (word_bit_count - min(count, word_bit_count)) << shift;
- ++bitmask;
- count -= min(count, word_bit_count - shift);
- if (!count)
return;
- if (count >= word_bit_count)
- {
memset(bitmask, 0xffu, count / CHAR_BIT);
bitmask += count / word_bit_count;
count = count % word_bit_count;
if (!count)
return;
- }
- *bitmask |= (1u << count) - 1;
+}
Does this intentionally not handle 0 count? I also suspect this has some room for simplification.
I wrote this patch a long time ago and looked through it so many times that it's hard for me to see problems. That's where review helps I guess :) I think I intentionally don't handle 0 count, I guess I could add an assert() at least. WRT simplification, this was originally written when I was looking into a game that sets most (or all) the float constants in one go so it somewhat reflects that (e.g. I think I started from the memset() part and then tacked the rest around it afterwards). I'll try to simplify it but I might ask for more specific suggestions if I can't find anything substantial...
I was mostly thinking you could get rid of the min() bits at the start by doing something like the following:
mask = ~0u << shift; mask_size = word_bit_count - shift; last_mask = (1u << ((start + count) & (word_bit_count - 1))) - 1; if (mask_size < count) { *bitmap |= mask; ++bitmap; count -= mask_size; mask = ~0u; } ... if (count) *bitmap |= (mask & last_mask);
It probably ends up being a little longer, but seems more straightforward and happens to handle 0 count as well.