Signed-off-by: Matteo Bruni mbruni@codeweavers.com --- v2: Introduce wined3d_bitmap_ffs() and wined3d_bitmap_ffz(), implement wined3d_apply_shader_constants() on top of them. Tweak a few additional details while at it, mostly to reduce the amount of changes necessary when using the same function for wined3d_stateblock_capture() and wined3d_stateblock_apply() in followup patches. v3: Introduce wined3d_bitmap_get_range(), get rid of wined3d_apply_shader_constants(), simplify wined3d_bitmap_ffs(), make both wined3d_bitmap_ffs() and wined3d_bitmap_ffz() use the same common code. (Thanks Henri!)
dlls/wined3d/device.c | 105 ++++++++++++--------------------- dlls/wined3d/wined3d_private.h | 74 +++++++++++++++++++---- 2 files changed, 101 insertions(+), 78 deletions(-)
diff --git a/dlls/wined3d/device.c b/dlls/wined3d/device.c index 3cf621ff46e..fa675f57290 100644 --- a/dlls/wined3d/device.c +++ b/dlls/wined3d/device.c @@ -3835,13 +3835,14 @@ struct wined3d_texture * CDECL wined3d_device_get_texture(const struct wined3d_d void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, struct wined3d_stateblock *stateblock) { - const struct wined3d_d3d_info *d3d_info = &stateblock->device->adapter->d3d_info; const struct wined3d_stateblock_state *state = &stateblock->stateblock_state; const struct wined3d_saved_states *changed = &stateblock->changed; struct wined3d_blend_state *blend_state; struct wined3d_color colour; - unsigned int i, j, count; + struct wined3d_range range; + unsigned int i, j, start; BOOL set_blend_state; + DWORD map;
TRACE("device %p, stateblock %p.\n", device, stateblock);
@@ -3850,89 +3851,57 @@ void CDECL wined3d_device_apply_stateblock(struct wined3d_device *device, if (changed->pixelShader) wined3d_device_set_pixel_shader(device, state->ps);
- count = 0; - for (i = 0; i < d3d_info->limits.vs_uniform_count; ++i) + for (start = 0; ; start = range.offset + range.size) { - if (wined3d_bitmap_is_set(changed->vs_consts_f, i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(changed->vs_consts_f, WINED3D_MAX_VS_CONSTS_F, start, &range)) + break; + + wined3d_device_set_vs_consts_f(device, range.offset, range.size, &state->vs_consts_f[range.offset]); } - if (count) - wined3d_device_set_vs_consts_f(device, i - count, count, state->vs_consts_f + i - count);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) + map = changed->vertexShaderConstantsI; + for (start = 0; ; start = range.offset + range.size) { - if (changed->vertexShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range)) + break; + + wined3d_device_set_vs_consts_i(device, range.offset, range.size, &state->vs_consts_i[range.offset]); } - if (count) - wined3d_device_set_vs_consts_b(device, i - count, count, state->vs_consts_b + i - count);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) + map = changed->vertexShaderConstantsB; + for (start = 0; ; start = range.offset + range.size) { - if (changed->vertexShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range)) + break; + + wined3d_device_set_vs_consts_b(device, range.offset, range.size, &state->vs_consts_b[range.offset]); } - if (count) - wined3d_device_set_vs_consts_i(device, i - count, count, state->vs_consts_i + i - count);
- count = 0; - for (i = 0; i < d3d_info->limits.ps_uniform_count; ++i) + for (start = 0; ; start = range.offset + range.size) { - if (wined3d_bitmap_is_set(changed->ps_consts_f, i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(changed->ps_consts_f, WINED3D_MAX_PS_CONSTS_F, start, &range)) + break; + + wined3d_device_set_ps_consts_f(device, range.offset, range.size, &state->ps_consts_f[range.offset]); } - if (count) - wined3d_device_set_ps_consts_f(device, i - count, count, state->ps_consts_f + i - count);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_B; ++i) + map = changed->pixelShaderConstantsI; + for (start = 0; ; start = range.offset + range.size) { - if (changed->pixelShaderConstantsB & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_I, start, &range)) + break; + + wined3d_device_set_ps_consts_i(device, range.offset, range.size, &state->ps_consts_i[range.offset]); } - if (count) - wined3d_device_set_ps_consts_b(device, i - count, count, state->ps_consts_b + i - count);
- count = 0; - for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i) + map = changed->pixelShaderConstantsB; + for (start = 0; ; start = range.offset + range.size) { - if (changed->pixelShaderConstantsI & (1u << i)) - ++count; - else if (count) - { - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count); - count = 0; - } + if (!wined3d_bitmap_get_range(&map, WINED3D_MAX_CONSTS_B, start, &range)) + break; + + wined3d_device_set_ps_consts_b(device, range.offset, range.size, &state->ps_consts_b[range.offset]); } - if (count) - wined3d_device_set_ps_consts_i(device, i - count, count, state->ps_consts_i + i - count);
for (i = 0; i < ARRAY_SIZE(state->light_state->light_map); ++i) { diff --git a/dlls/wined3d/wined3d_private.h b/dlls/wined3d/wined3d_private.h index b6042cd6179..15e376c8328 100644 --- a/dlls/wined3d/wined3d_private.h +++ b/dlls/wined3d/wined3d_private.h @@ -3196,13 +3196,13 @@ struct wined3d_state struct wined3d_shader_resource_view *shader_resource_view[WINED3D_SHADER_TYPE_COUNT][MAX_SHADER_RESOURCE_VIEWS]; struct wined3d_unordered_access_view *unordered_access_view[WINED3D_PIPELINE_COUNT][MAX_UNORDERED_ACCESS_VIEWS];
- BOOL vs_consts_b[WINED3D_MAX_CONSTS_B]; - struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I]; struct wined3d_vec4 vs_consts_f[WINED3D_MAX_VS_CONSTS_F]; + struct wined3d_ivec4 vs_consts_i[WINED3D_MAX_CONSTS_I]; + BOOL vs_consts_b[WINED3D_MAX_CONSTS_B];
- BOOL ps_consts_b[WINED3D_MAX_CONSTS_B]; - struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I]; struct wined3d_vec4 ps_consts_f[WINED3D_MAX_PS_CONSTS_F]; + struct wined3d_ivec4 ps_consts_i[WINED3D_MAX_CONSTS_I]; + BOOL ps_consts_b[WINED3D_MAX_CONSTS_B];
struct wined3d_texture *textures[WINED3D_MAX_COMBINED_SAMPLERS]; DWORD sampler_states[WINED3D_MAX_COMBINED_SAMPLERS][WINED3D_HIGHEST_SAMPLER_STATE + 1]; @@ -3914,6 +3914,12 @@ struct wined3d_vertex_declaration
struct wined3d_saved_states { + DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5]; + WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ + WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ + DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5]; + WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ + WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ DWORD transform[(WINED3D_HIGHEST_TRANSFORM_STATE >> 5) + 1]; WORD streamSource; /* WINED3D_MAX_STREAMS, 16 */ WORD streamFreq; /* WINED3D_MAX_STREAMS, 16 */ @@ -3921,12 +3927,6 @@ struct wined3d_saved_states DWORD textureState[WINED3D_MAX_TEXTURES]; /* WINED3D_HIGHEST_TEXTURE_STATE + 1, 18 */ WORD samplerState[WINED3D_MAX_COMBINED_SAMPLERS]; /* WINED3D_HIGHEST_SAMPLER_STATE + 1, 14 */ DWORD clipplane; /* WINED3D_MAX_USER_CLIP_PLANES, 32 */ - WORD pixelShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ - WORD pixelShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - DWORD ps_consts_f[WINED3D_MAX_PS_CONSTS_F >> 5]; - WORD vertexShaderConstantsB; /* WINED3D_MAX_CONSTS_B, 16 */ - WORD vertexShaderConstantsI; /* WINED3D_MAX_CONSTS_I, 16 */ - DWORD vs_consts_f[WINED3D_MAX_VS_CONSTS_F >> 5]; DWORD textures : 20; /* WINED3D_MAX_COMBINED_SAMPLERS, 20 */ DWORD indices : 1; DWORD material : 1; @@ -5325,6 +5325,60 @@ static inline BOOL wined3d_bitmap_is_set(const uint32_t *map, unsigned int idx) return map[idx >> 5] & (1u << (idx & 0x1f)); }
+static inline unsigned int wined3d_bitmap_ffs_xor(const uint32_t *bitmap, unsigned int bit_count, + unsigned int start, uint32_t xor_mask) +{ + const unsigned int word_bit_count = sizeof(*bitmap) * CHAR_BIT; + const uint32_t *ptr, *end_ptr; + uint32_t map, mask; + + assert(bit_count < word_bit_count || !(bit_count % word_bit_count)); + + ptr = bitmap + start / word_bit_count; + end_ptr = bitmap + (bit_count + word_bit_count - 1) / word_bit_count; + + if (ptr >= end_ptr) + return ~0u; + + mask = ~0u << start % word_bit_count; + map = (*ptr ^ xor_mask) & mask; + while (!map) + { + if (++ptr == end_ptr) + return ~0u; + map = *ptr ^ xor_mask; + } + return (ptr - bitmap) * word_bit_count + wined3d_bit_scan(&map); +} + +static inline unsigned int wined3d_bitmap_ffs(const uint32_t *bitmap, unsigned int bit_count, unsigned int start) +{ + return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, 0); +} + +static inline unsigned int wined3d_bitmap_ffz(const uint32_t *bitmap, unsigned int bit_count, unsigned int start) +{ + return wined3d_bitmap_ffs_xor(bitmap, bit_count, start, ~0u); +} + +static inline BOOL wined3d_bitmap_get_range(const DWORD *bitmap, unsigned int bit_count, + unsigned int start, struct wined3d_range *range) +{ + unsigned int range_start, range_end; + + range_start = wined3d_bitmap_ffs(bitmap, bit_count, start); + if (range_start == ~0u) + return FALSE; + + range_end = wined3d_bitmap_ffz(bitmap, bit_count, range_start + 1); + if (range_end == ~0u) + range_end = bit_count; + + range->offset = range_start; + range->size = range_end - range_start; + return TRUE; +} + /* The WNDCLASS-Name for the fake window which we use to retrieve the GL capabilities */ #define WINED3D_OPENGL_WINDOW_CLASS_NAME "WineD3D_OpenGL"