On Fri, Apr 02, 2021 at 03:54:02PM +0300, Gabriel Ivăncescu wrote:
Usually, when there's performance issues with RGB into color table conversions, the same color table is used. So instead of generating it over and over, this caches it if it's frequently accessed (as should be the case when performance is an issue in the first place).
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=50898 Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com
dlls/gdi32/dibdrv/primitives.c | 89 +++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 8947c60..6071280 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3511,12 +3511,99 @@ struct rgb_lookup_colortable_ctx
static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) {
- unsigned r, g, b;
/*
Globally cache frequently generated maps, as most apps usually only use a few color tables often.
We keep the variables separated for better cache locality when looking them up (tables are large).
*/
static SRWLOCK global_cache_lock = SRWLOCK_INIT;
static volatile DWORD global_cache_sequence;
/* ticks when the cached map was last accessed or generated; we discard the oldest first */
static DWORD global_cache_ticks[4];
/* number of colors in the given cached color table */
static unsigned global_cache_color_table_size[ARRAY_SIZE(global_cache_ticks)];
/* the actual cached color tables to lookup and see if they're cached */
static RGBQUAD global_cache_color_table[ARRAY_SIZE(global_cache_ticks)][256];
/* the associated generated maps for the color tables above */
static BYTE global_cache_map[ARRAY_SIZE(global_cache_ticks)][ARRAY_SIZE(ctx->map)];
unsigned color_table_size = dib->color_table ? dib->color_table_size : 1 << dib->bit_count;
const RGBQUAD *color_table = get_dib_color_table(dib);
DWORD newest, oldest_entry, sequence;
unsigned i, r, g, b;
/* check the global cache first */
if (TryAcquireSRWLockShared(&global_cache_lock))
{
newest = global_cache_ticks[0];
for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
{
if (color_table_size == global_cache_color_table_size[i] &&
!memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
{
memcpy(ctx->map, global_cache_map[i], sizeof(ctx->map));
/* it's a shared lock, but the exact timestamp is unimportant, so it's OK */
global_cache_ticks[i] = GetTickCount();
ReleaseSRWLockShared(&global_cache_lock);
return;
}
if ((LONG)(newest - global_cache_ticks[i]) < 0)
newest = global_cache_ticks[i];
}
sequence = global_cache_sequence;
ReleaseSRWLockShared(&global_cache_lock);
}
else
{
newest = global_cache_ticks[0];
sequence = global_cache_sequence - 1; /* make sure we re-check the cache */
}
for (b = 4; b < 256; b += 1 << 3) for (g = 4; g < 256; g += 1 << 3) for (r = 4; r < 256; r += 1 << 3) ctx->map[r >> 3 | (g & ~7) << 2 | (b & ~7) << 7] = rgb_to_pixel_colortable(dib, r, g, b);
/* update the global cache if needed */
AcquireSRWLockExclusive(&global_cache_lock);
/* if the cache was updated underneath us, check it again */
if (sequence != global_cache_sequence)
{
for (i = 0; i < ARRAY_SIZE(global_cache_color_table_size); i++)
{
if ((LONG)(newest - global_cache_ticks[i]) <= 0 &&
color_table_size == global_cache_color_table_size[i] &&
!memcmp(color_table, global_cache_color_table[i], color_table_size * sizeof(color_table[0])))
{
global_cache_ticks[i] = GetTickCount();
ReleaseSRWLockExclusive(&global_cache_lock);
return;
}
}
}
global_cache_sequence++;
oldest_entry = 0;
for (i = 1; i < ARRAY_SIZE(global_cache_color_table_size); i++)
if ((LONG)(global_cache_ticks[oldest_entry] - global_cache_ticks[i]) > 0)
oldest_entry = i;
memcpy(global_cache_color_table[oldest_entry], color_table, color_table_size * sizeof(color_table[0]));
memcpy(global_cache_map[oldest_entry], ctx->map, sizeof(ctx->map));
global_cache_color_table_size[oldest_entry] = color_table_size;
global_cache_ticks[oldest_entry] = GetTickCount();
ReleaseSRWLockExclusive(&global_cache_lock);
}
Yeah, I'm not convinced, not even close. You're going to have to provide some pretty good evidence, with real world apps, that this is necessary over a local, lazy-init solution.
Huw.