[PATCH v3 1/2] gdi32: Move the loop through each clipped rectangle to the primitive blend funcs.
This is to prepare so we don't recalculate the lookup cache map for color tables on every clipped rect (which is expensive). Signed-off-by: Gabriel Ivăncescu <gabrielopcode(a)gmail.com> --- This is a no-op patch, it's needed for next patch. dlls/gdi32/dibdrv/bitblt.c | 14 +- dlls/gdi32/dibdrv/dibdrv.h | 18 +-- dlls/gdi32/dibdrv/primitives.c | 284 +++++++++++++++++++-------------- 3 files changed, 177 insertions(+), 139 deletions(-) diff --git a/dlls/gdi32/dibdrv/bitblt.c b/dlls/gdi32/dibdrv/bitblt.c index 8f67535..045c969 100644 --- a/dlls/gdi32/dibdrv/bitblt.c +++ b/dlls/gdi32/dibdrv/bitblt.c @@ -584,17 +584,15 @@ static void mask_rect( dib_info *dst, const RECT *dst_rect, const dib_info *src, static DWORD blend_rect( dib_info *dst, const RECT *dst_rect, const dib_info *src, const RECT *src_rect, HRGN clip, BLENDFUNCTION blend ) { - POINT origin; struct clipped_rects clipped_rects; - int i; + POINT offset; if (!get_clipped_rects( dst, dst_rect, clip, &clipped_rects )) return ERROR_SUCCESS; - for (i = 0; i < clipped_rects.count; i++) - { - origin.x = src_rect->left + clipped_rects.rects[i].left - dst_rect->left; - origin.y = src_rect->top + clipped_rects.rects[i].top - dst_rect->top; - dst->funcs->blend_rect( dst, &clipped_rects.rects[i], src, &origin, blend ); - } + + offset.x = src_rect->left - dst_rect->left; + offset.y = src_rect->top - dst_rect->top; + dst->funcs->blend_rect( dst, src, &offset, &clipped_rects, blend ); + free_clipped_rects( &clipped_rects ); return ERROR_SUCCESS; } diff --git a/dlls/gdi32/dibdrv/dibdrv.h b/dlls/gdi32/dibdrv/dibdrv.h index 88b4c62..7db825c 100644 --- a/dlls/gdi32/dibdrv/dibdrv.h +++ b/dlls/gdi32/dibdrv/dibdrv.h @@ -165,6 +165,13 @@ static inline dibdrv_physdev *get_dibdrv_pdev( PHYSDEV dev ) return (dibdrv_physdev *)dev; } +struct clipped_rects +{ + RECT *rects; + int count; + RECT buffer[32]; +}; + struct line_params { int err_start, err_add_1, err_add_2, bias; @@ -189,8 +196,8 @@ typedef struct primitive_funcs const dib_info *brush, const rop_mask_bits *bits); void (* copy_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, const POINT *origin, int rop2, int overlap); - void (* blend_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, - const POINT *origin, BLENDFUNCTION blend); + void (* blend_rect)(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend); BOOL (* gradient_rect)(const dib_info *dib, const RECT *rc, const TRIVERTEX *v, int mode); void (* mask_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, const POINT *origin, int rop2); @@ -240,13 +247,6 @@ typedef struct DWORD octant; } bres_params; -struct clipped_rects -{ - RECT *rects; - int count; - RECT buffer[32]; -}; - extern void get_rop_codes(INT rop, struct rop_codes *codes) DECLSPEC_HIDDEN; extern void reset_dash_origin(dibdrv_physdev *pdev) DECLSPEC_HIDDEN; extern void init_dib_info_from_bitmapinfo(dib_info *dib, const BITMAPINFO *info, void *bits) DECLSPEC_HIDDEN; diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 01a1c7c..0a5f7e5 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -4651,199 +4651,239 @@ static inline DWORD blend_rgb( BYTE dst_r, BYTE dst_g, BYTE dst_b, DWORD src, BL blend_color( dst_r, src >> 16, blend.SourceConstantAlpha ) << 16); } -static void blend_rect_8888(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_8888(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - if (blend.AlphaFormat & AC_SRC_ALPHA) + for (i = 0; i < clipped_rects->count; i++) { - if (blend.SourceConstantAlpha == 255) - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb( dst_ptr[x], src_ptr[x] ); + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); + + if (blend.AlphaFormat & AC_SRC_ALPHA) + { + if (blend.SourceConstantAlpha == 255) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb( dst_ptr[x], src_ptr[x] ); + else + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + } + else if (src->compression == BI_RGB) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_constant_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); else - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); - } - else if (src->compression == BI_RGB) - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_constant_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); - else - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_no_src_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_no_src_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + } } -static void blend_rect_32(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_32(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - if (dst->red_len == 8 && dst->green_len == 8 && dst->blue_len == 8) + for (i = 0; i < clipped_rects->count; i++) { - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); + + if (dst->red_len == 8 && dst->green_len == 8 && dst->blue_len == 8) { - for (x = 0; x < rc->right - rc->left; x++) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( dst_ptr[x] >> dst->red_shift, - dst_ptr[x] >> dst->green_shift, - dst_ptr[x] >> dst->blue_shift, - src_ptr[x], blend ); - dst_ptr[x] = ((( val & 0xff) << dst->blue_shift) | - (((val >> 8) & 0xff) << dst->green_shift) | - (((val >> 16) & 0xff) << dst->red_shift)); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( dst_ptr[x] >> dst->red_shift, + dst_ptr[x] >> dst->green_shift, + dst_ptr[x] >> dst->blue_shift, + src_ptr[x], blend ); + dst_ptr[x] = ((( val & 0xff) << dst->blue_shift) | + (((val >> 8) & 0xff) << dst->green_shift) | + (((val >> 16) & 0xff) << dst->red_shift)); + } } } - } - else - { - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + else { - for (x = 0; x < rc->right - rc->left; x++) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), - get_field( dst_ptr[x], dst->green_shift, dst->green_len ), - get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), - src_ptr[x], blend ); - dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), + get_field( dst_ptr[x], dst->green_shift, dst->green_len ), + get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), + src_ptr[x], blend ); + dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + } } } } } -static void blend_rect_24(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_24(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_24( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_24( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( dst_ptr[x * 3 + 2], dst_ptr[x * 3 + 1], dst_ptr[x * 3], - src_ptr[x], blend ); - dst_ptr[x * 3] = val; - dst_ptr[x * 3 + 1] = val >> 8; - dst_ptr[x * 3 + 2] = val >> 16; + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( dst_ptr[x * 3 + 2], dst_ptr[x * 3 + 1], dst_ptr[x * 3], + src_ptr[x], blend ); + dst_ptr[x * 3] = val; + dst_ptr[x * 3 + 1] = val >> 8; + dst_ptr[x * 3 + 2] = val >> 16; + } } } } -static void blend_rect_555(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_555(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( ((dst_ptr[x] >> 7) & 0xf8) | ((dst_ptr[x] >> 12) & 0x07), - ((dst_ptr[x] >> 2) & 0xf8) | ((dst_ptr[x] >> 7) & 0x07), - ((dst_ptr[x] << 3) & 0xf8) | ((dst_ptr[x] >> 2) & 0x07), - src_ptr[x], blend ); - dst_ptr[x] = ((val >> 9) & 0x7c00) | ((val >> 6) & 0x03e0) | ((val >> 3) & 0x001f); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( ((dst_ptr[x] >> 7) & 0xf8) | ((dst_ptr[x] >> 12) & 0x07), + ((dst_ptr[x] >> 2) & 0xf8) | ((dst_ptr[x] >> 7) & 0x07), + ((dst_ptr[x] << 3) & 0xf8) | ((dst_ptr[x] >> 2) & 0x07), + src_ptr[x], blend ); + dst_ptr[x] = ((val >> 9) & 0x7c00) | ((val >> 6) & 0x03e0) | ((val >> 3) & 0x001f); + } } } } -static void blend_rect_16(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_16(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), - get_field( dst_ptr[x], dst->green_shift, dst->green_len ), - get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), - src_ptr[x], blend ); - dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), + get_field( dst_ptr[x], dst->green_shift, dst->green_len ), + get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), + src_ptr[x], blend ); + dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + } } } } -static void blend_rect_8(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_8( dst, rc->left, rc->top ); - int x, y; + int i, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_8( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - RGBQUAD rgb = color_table[dst_ptr[x]]; - DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); - dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + RGBQUAD rgb = color_table[dst_ptr[x]]; + DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); + dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + } } } } -static void blend_rect_4(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_4( dst, rc->left, rc->top ); - int i, x, y; + int i, j, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (i = 0, x = (dst->rect.left + rc->left) & 1; i < rc->right - rc->left; i++, x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_4( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; - RGBQUAD rgb = color_table[val]; - val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[i], blend ); - val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); - if (x & 1) - dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); - else - dst_ptr[x / 2] = (val << 4) | (dst_ptr[x / 2] & 0x0f); + for (j = 0, x = (dst->rect.left + rc->left) & 1; j < rc->right - rc->left; j++, x++) + { + DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; + RGBQUAD rgb = color_table[val]; + val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); + val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + if (x & 1) + dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); + else + dst_ptr[x / 2] = (val << 4) | (dst_ptr[x / 2] & 0x0f); + } } } } -static void blend_rect_1(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_1(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_1( dst, rc->left, rc->top ); - int i, x, y; + int i, j, x, y; - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (i = 0, x = (dst->rect.left + rc->left) & 7; i < rc->right - rc->left; i++, x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_1( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = (dst_ptr[x / 8] & pixel_masks_1[x % 8]) ? 1 : 0; - RGBQUAD rgb = color_table[val]; - val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[i], blend ); - val = rgb_to_pixel_colortable(dst, val >> 16, val >> 8, val) ? 0xff : 0; - dst_ptr[x / 8] = (dst_ptr[x / 8] & ~pixel_masks_1[x % 8]) | (val & pixel_masks_1[x % 8]); + for (j = 0, x = (dst->rect.left + rc->left) & 7; j < rc->right - rc->left; j++, x++) + { + DWORD val = (dst_ptr[x / 8] & pixel_masks_1[x % 8]) ? 1 : 0; + RGBQUAD rgb = color_table[val]; + val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); + val = rgb_to_pixel_colortable(dst, val >> 16, val >> 8, val) ? 0xff : 0; + dst_ptr[x / 8] = (dst_ptr[x / 8] & ~pixel_masks_1[x % 8]) | (val & pixel_masks_1[x % 8]); + } } } } -static void blend_rect_null(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_null(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { } -- 2.30.0
Signed-off-by: Gabriel Ivăncescu <gabrielopcode(a)gmail.com> --- dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-) diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } } -static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/* + * To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB + * at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits + * of the color are thus set to halfway (0x04) and then it's used in the distance calculation + * to the exact color in the color table. We exploit this as well to create a lookup cache. +*/ +struct rgb_lookup_colortable_ctx +{ + const dib_info *dib; + BYTE map[32768]; + BYTE valid[32768 / 8]; +}; + +static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) { - if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); - return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); + ctx->dib = dib; + memset(ctx->valid, 0, sizeof(ctx->valid)); } -static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) { - /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */ - return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 ); + unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7; + + if (!(ctx->valid[pos / 8] & (1 << pos % 8))) + { + ctx->valid[pos / 8] |= 1 << pos % 8; + ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4); + } + return ctx->map[pos]; +} + +static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +{ + if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); + return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); } static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rect, BOOL dither) { BYTE *dst_start = get_pixel_ptr_8(dst, 0, 0), *dst_pixel; INT x, y, pad_size = ((dst->width + 3) & ~3) - (src_rect->right - src_rect->left); + struct rgb_lookup_colortable_ctx lookup_ctx; DWORD src_val; switch(src->bit_count) @@ -3521,6 +3547,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec { DWORD *src_start = get_pixel_ptr_32(src, src_rect->left, src_rect->top), *src_pixel; + rgb_lookup_colortable_init(dst, &lookup_ctx); if(src->funcs == &funcs_8888) { for(y = src_rect->top; y < src_rect->bottom; y++) @@ -3530,7 +3557,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, src_val >> 16, src_val >> 8, src_val ); + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_val >> 16, src_val >> 8, src_val ); } if(pad_size) memset(dst_pixel, 0, pad_size); dst_start += dst->stride; @@ -3546,7 +3573,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_val >> src->red_shift, src_val >> src->green_shift, src_val >> src->blue_shift ); @@ -3565,7 +3592,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, get_field(src_val, src->red_shift, src->red_len), get_field(src_val, src->green_shift, src->green_len), get_field(src_val, src->blue_shift, src->blue_len)); @@ -3582,13 +3609,14 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec { BYTE *src_start = get_pixel_ptr_24(src, src_rect->left, src_rect->top), *src_pixel; + rgb_lookup_colortable_init(dst, &lookup_ctx); for(y = src_rect->top; y < src_rect->bottom; y++) { dst_pixel = dst_start; src_pixel = src_start; for(x = src_rect->left; x < src_rect->right; x++, src_pixel += 3) { - *dst_pixel++ = rgb_lookup_colortable(dst, src_pixel[2], src_pixel[1], src_pixel[0] ); + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_pixel[2], src_pixel[1], src_pixel[0] ); } if(pad_size) memset(dst_pixel, 0, pad_size); dst_start += dst->stride; @@ -3600,6 +3628,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec case 16: { WORD *src_start = get_pixel_ptr_16(src, src_rect->left, src_rect->top), *src_pixel; + rgb_lookup_colortable_init(dst, &lookup_ctx); if(src->funcs == &funcs_555) { for(y = src_rect->top; y < src_rect->bottom; y++) @@ -3609,7 +3638,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, ((src_val >> 7) & 0xf8) | ((src_val >> 12) & 0x07), ((src_val >> 2) & 0xf8) | ((src_val >> 7) & 0x07), ((src_val << 3) & 0xf8) | ((src_val >> 2) & 0x07) ); @@ -3628,7 +3657,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, (((src_val >> src->red_shift) << 3) & 0xf8) | (((src_val >> src->red_shift) >> 2) & 0x07), (((src_val >> src->green_shift) << 3) & 0xf8) | @@ -3650,7 +3679,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, (((src_val >> src->red_shift) << 3) & 0xf8) | (((src_val >> src->red_shift) >> 2) & 0x07), (((src_val >> src->green_shift) << 2) & 0xfc) | @@ -3672,7 +3701,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, get_field(src_val, src->red_shift, src->red_len), get_field(src_val, src->green_shift, src->green_len), get_field(src_val, src->blue_shift, src->blue_len)); @@ -4807,8 +4836,10 @@ static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT * const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); + struct rgb_lookup_colortable_ctx lookup_ctx; int i, x, y; + rgb_lookup_colortable_init( dst, &lookup_ctx ); for (i = 0; i < clipped_rects->count; i++) { const RECT *rc = &clipped_rects->rects[i]; @@ -4821,7 +4852,7 @@ static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT * { RGBQUAD rgb = color_table[dst_ptr[x]]; DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); - dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + dst_ptr[x] = rgb_lookup_colortable( &lookup_ctx, val >> 16, val >> 8, val ); } } } @@ -4831,8 +4862,10 @@ static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT * const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); + struct rgb_lookup_colortable_ctx lookup_ctx; int i, j, x, y; + rgb_lookup_colortable_init( dst, &lookup_ctx ); for (i = 0; i < clipped_rects->count; i++) { const RECT *rc = &clipped_rects->rects[i]; @@ -4846,7 +4879,7 @@ static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT * DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; RGBQUAD rgb = color_table[val]; val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); - val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + val = rgb_lookup_colortable( &lookup_ctx, val >> 16, val >> 8, val ); if (x & 1) dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); else -- 2.30.0
On Thu, Apr 08, 2021 at 04:11:04PM +0300, Gabriel Ivăncescu wrote:
Signed-off-by: Gabriel Ivăncescu <gabrielopcode(a)gmail.com> --- dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } }
-static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/* + * To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB + * at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits + * of the color are thus set to halfway (0x04) and then it's used in the distance calculation + * to the exact color in the color table. We exploit this as well to create a lookup cache. +*/ +struct rgb_lookup_colortable_ctx +{ + const dib_info *dib; + BYTE map[32768]; + BYTE valid[32768 / 8]; +}; + +static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) { - if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); - return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); + ctx->dib = dib; + memset(ctx->valid, 0, sizeof(ctx->valid)); }
-static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) { - /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */ - return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 ); + unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7; + + if (!(ctx->valid[pos / 8] & (1 << pos % 8))) + { + ctx->valid[pos / 8] |= 1 << pos % 8; + ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4); + } + return ctx->map[pos]; +}
I've sent in v4 of this series in which I've tweaked this a bit to give the compiler more of a chance to optimize things as well as using a lookup for the pixel masks. With a 300x300 32-bpp -> 8-bpp BitBlt I'm getting performance slighty better than Windows if there aren't many distinct RGB values and a little worse if the entire map needs filling. Huw.
On 16/04/2021 13:17, Huw Davies wrote:
On Thu, Apr 08, 2021 at 04:11:04PM +0300, Gabriel Ivăncescu wrote:
Signed-off-by: Gabriel Ivăncescu <gabrielopcode(a)gmail.com> --- dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } }
-static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/* + * To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB + * at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits + * of the color are thus set to halfway (0x04) and then it's used in the distance calculation + * to the exact color in the color table. We exploit this as well to create a lookup cache. +*/ +struct rgb_lookup_colortable_ctx +{ + const dib_info *dib; + BYTE map[32768]; + BYTE valid[32768 / 8]; +}; + +static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) { - if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); - return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); + ctx->dib = dib; + memset(ctx->valid, 0, sizeof(ctx->valid)); }
-static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) { - /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */ - return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 ); + unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7; + + if (!(ctx->valid[pos / 8] & (1 << pos % 8))) + { + ctx->valid[pos / 8] |= 1 << pos % 8; + ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4); + } + return ctx->map[pos]; +}
I've sent in v4 of this series in which I've tweaked this a bit to give the compiler more of a chance to optimize things as well as using a lookup for the pixel masks.
With a 300x300 32-bpp -> 8-bpp BitBlt I'm getting performance slighty better than Windows if there aren't many distinct RGB values and a little worse if the entire map needs filling.
Huw.
Looks good, thanks.
participants (2)
-
Gabriel Ivăncescu -
Huw Davies