This is to prepare so we don't recalculate the lookup cache map for color tables on every clipped rect (which is expensive).
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com ---
This is a no-op patch, it's needed for next patch.
dlls/gdi32/dibdrv/bitblt.c | 14 +- dlls/gdi32/dibdrv/dibdrv.h | 18 +-- dlls/gdi32/dibdrv/primitives.c | 284 +++++++++++++++++++-------------- 3 files changed, 177 insertions(+), 139 deletions(-)
diff --git a/dlls/gdi32/dibdrv/bitblt.c b/dlls/gdi32/dibdrv/bitblt.c index 8f67535..045c969 100644 --- a/dlls/gdi32/dibdrv/bitblt.c +++ b/dlls/gdi32/dibdrv/bitblt.c @@ -584,17 +584,15 @@ static void mask_rect( dib_info *dst, const RECT *dst_rect, const dib_info *src, static DWORD blend_rect( dib_info *dst, const RECT *dst_rect, const dib_info *src, const RECT *src_rect, HRGN clip, BLENDFUNCTION blend ) { - POINT origin; struct clipped_rects clipped_rects; - int i; + POINT offset;
if (!get_clipped_rects( dst, dst_rect, clip, &clipped_rects )) return ERROR_SUCCESS; - for (i = 0; i < clipped_rects.count; i++) - { - origin.x = src_rect->left + clipped_rects.rects[i].left - dst_rect->left; - origin.y = src_rect->top + clipped_rects.rects[i].top - dst_rect->top; - dst->funcs->blend_rect( dst, &clipped_rects.rects[i], src, &origin, blend ); - } + + offset.x = src_rect->left - dst_rect->left; + offset.y = src_rect->top - dst_rect->top; + dst->funcs->blend_rect( dst, src, &offset, &clipped_rects, blend ); + free_clipped_rects( &clipped_rects ); return ERROR_SUCCESS; } diff --git a/dlls/gdi32/dibdrv/dibdrv.h b/dlls/gdi32/dibdrv/dibdrv.h index 88b4c62..7db825c 100644 --- a/dlls/gdi32/dibdrv/dibdrv.h +++ b/dlls/gdi32/dibdrv/dibdrv.h @@ -165,6 +165,13 @@ static inline dibdrv_physdev *get_dibdrv_pdev( PHYSDEV dev ) return (dibdrv_physdev *)dev; }
+struct clipped_rects +{ + RECT *rects; + int count; + RECT buffer[32]; +}; + struct line_params { int err_start, err_add_1, err_add_2, bias; @@ -189,8 +196,8 @@ typedef struct primitive_funcs const dib_info *brush, const rop_mask_bits *bits); void (* copy_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, const POINT *origin, int rop2, int overlap); - void (* blend_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, - const POINT *origin, BLENDFUNCTION blend); + void (* blend_rect)(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend); BOOL (* gradient_rect)(const dib_info *dib, const RECT *rc, const TRIVERTEX *v, int mode); void (* mask_rect)(const dib_info *dst, const RECT *rc, const dib_info *src, const POINT *origin, int rop2); @@ -240,13 +247,6 @@ typedef struct DWORD octant; } bres_params;
-struct clipped_rects -{ - RECT *rects; - int count; - RECT buffer[32]; -}; - extern void get_rop_codes(INT rop, struct rop_codes *codes) DECLSPEC_HIDDEN; extern void reset_dash_origin(dibdrv_physdev *pdev) DECLSPEC_HIDDEN; extern void init_dib_info_from_bitmapinfo(dib_info *dib, const BITMAPINFO *info, void *bits) DECLSPEC_HIDDEN; diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 01a1c7c..0a5f7e5 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -4651,199 +4651,239 @@ static inline DWORD blend_rgb( BYTE dst_r, BYTE dst_g, BYTE dst_b, DWORD src, BL blend_color( dst_r, src >> 16, blend.SourceConstantAlpha ) << 16); }
-static void blend_rect_8888(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_8888(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- if (blend.AlphaFormat & AC_SRC_ALPHA) + for (i = 0; i < clipped_rects->count; i++) { - if (blend.SourceConstantAlpha == 255) - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb( dst_ptr[x], src_ptr[x] ); + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); + + if (blend.AlphaFormat & AC_SRC_ALPHA) + { + if (blend.SourceConstantAlpha == 255) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb( dst_ptr[x], src_ptr[x] ); + else + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + } + else if (src->compression == BI_RGB) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_constant_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); else - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); - } - else if (src->compression == BI_RGB) - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_constant_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); - else - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) - for (x = 0; x < rc->right - rc->left; x++) - dst_ptr[x] = blend_argb_no_src_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + for (x = 0; x < rc->right - rc->left; x++) + dst_ptr[x] = blend_argb_no_src_alpha( dst_ptr[x], src_ptr[x], blend.SourceConstantAlpha ); + } }
-static void blend_rect_32(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_32(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- if (dst->red_len == 8 && dst->green_len == 8 && dst->blue_len == 8) + for (i = 0; i < clipped_rects->count; i++) { - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + DWORD *dst_ptr = get_pixel_ptr_32( dst, rc->left, rc->top ); + + if (dst->red_len == 8 && dst->green_len == 8 && dst->blue_len == 8) { - for (x = 0; x < rc->right - rc->left; x++) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( dst_ptr[x] >> dst->red_shift, - dst_ptr[x] >> dst->green_shift, - dst_ptr[x] >> dst->blue_shift, - src_ptr[x], blend ); - dst_ptr[x] = ((( val & 0xff) << dst->blue_shift) | - (((val >> 8) & 0xff) << dst->green_shift) | - (((val >> 16) & 0xff) << dst->red_shift)); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( dst_ptr[x] >> dst->red_shift, + dst_ptr[x] >> dst->green_shift, + dst_ptr[x] >> dst->blue_shift, + src_ptr[x], blend ); + dst_ptr[x] = ((( val & 0xff) << dst->blue_shift) | + (((val >> 8) & 0xff) << dst->green_shift) | + (((val >> 16) & 0xff) << dst->red_shift)); + } } } - } - else - { - for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) + else { - for (x = 0; x < rc->right - rc->left; x++) + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 4, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), - get_field( dst_ptr[x], dst->green_shift, dst->green_len ), - get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), - src_ptr[x], blend ); - dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), + get_field( dst_ptr[x], dst->green_shift, dst->green_len ), + get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), + src_ptr[x], blend ); + dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + } } } } }
-static void blend_rect_24(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_24(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_24( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_24( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( dst_ptr[x * 3 + 2], dst_ptr[x * 3 + 1], dst_ptr[x * 3], - src_ptr[x], blend ); - dst_ptr[x * 3] = val; - dst_ptr[x * 3 + 1] = val >> 8; - dst_ptr[x * 3 + 2] = val >> 16; + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( dst_ptr[x * 3 + 2], dst_ptr[x * 3 + 1], dst_ptr[x * 3], + src_ptr[x], blend ); + dst_ptr[x * 3] = val; + dst_ptr[x * 3 + 1] = val >> 8; + dst_ptr[x * 3 + 2] = val >> 16; + } } } }
-static void blend_rect_555(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_555(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( ((dst_ptr[x] >> 7) & 0xf8) | ((dst_ptr[x] >> 12) & 0x07), - ((dst_ptr[x] >> 2) & 0xf8) | ((dst_ptr[x] >> 7) & 0x07), - ((dst_ptr[x] << 3) & 0xf8) | ((dst_ptr[x] >> 2) & 0x07), - src_ptr[x], blend ); - dst_ptr[x] = ((val >> 9) & 0x7c00) | ((val >> 6) & 0x03e0) | ((val >> 3) & 0x001f); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( ((dst_ptr[x] >> 7) & 0xf8) | ((dst_ptr[x] >> 12) & 0x07), + ((dst_ptr[x] >> 2) & 0xf8) | ((dst_ptr[x] >> 7) & 0x07), + ((dst_ptr[x] << 3) & 0xf8) | ((dst_ptr[x] >> 2) & 0x07), + src_ptr[x], blend ); + dst_ptr[x] = ((val >> 9) & 0x7c00) | ((val >> 6) & 0x03e0) | ((val >> 3) & 0x001f); + } } } }
-static void blend_rect_16(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_16(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + WORD *dst_ptr = get_pixel_ptr_16( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride / 2, src_ptr += src->stride / 4) { - DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), - get_field( dst_ptr[x], dst->green_shift, dst->green_len ), - get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), - src_ptr[x], blend ); - dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + DWORD val = blend_rgb( get_field( dst_ptr[x], dst->red_shift, dst->red_len ), + get_field( dst_ptr[x], dst->green_shift, dst->green_len ), + get_field( dst_ptr[x], dst->blue_shift, dst->blue_len ), + src_ptr[x], blend ); + dst_ptr[x] = rgb_to_pixel_masks( dst, val >> 16, val >> 8, val ); + } } } }
-static void blend_rect_8(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_8( dst, rc->left, rc->top ); - int x, y; + int i, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (x = 0; x < rc->right - rc->left; x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_8( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - RGBQUAD rgb = color_table[dst_ptr[x]]; - DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); - dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + for (x = 0; x < rc->right - rc->left; x++) + { + RGBQUAD rgb = color_table[dst_ptr[x]]; + DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); + dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + } } } }
-static void blend_rect_4(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_4( dst, rc->left, rc->top ); - int i, x, y; + int i, j, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (i = 0, x = (dst->rect.left + rc->left) & 1; i < rc->right - rc->left; i++, x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_4( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; - RGBQUAD rgb = color_table[val]; - val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[i], blend ); - val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); - if (x & 1) - dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); - else - dst_ptr[x / 2] = (val << 4) | (dst_ptr[x / 2] & 0x0f); + for (j = 0, x = (dst->rect.left + rc->left) & 1; j < rc->right - rc->left; j++, x++) + { + DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; + RGBQUAD rgb = color_table[val]; + val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); + val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + if (x & 1) + dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); + else + dst_ptr[x / 2] = (val << 4) | (dst_ptr[x / 2] & 0x0f); + } } } }
-static void blend_rect_1(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_1(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); - DWORD *src_ptr = get_pixel_ptr_32( src, origin->x, origin->y ); - BYTE *dst_ptr = get_pixel_ptr_1( dst, rc->left, rc->top ); - int i, x, y; + int i, j, x, y;
- for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) + for (i = 0; i < clipped_rects->count; i++) { - for (i = 0, x = (dst->rect.left + rc->left) & 7; i < rc->right - rc->left; i++, x++) + const RECT *rc = &clipped_rects->rects[i]; + DWORD *src_ptr = get_pixel_ptr_32( src, rc->left + offset->x, rc->top + offset->y ); + BYTE *dst_ptr = get_pixel_ptr_1( dst, rc->left, rc->top ); + + for (y = rc->top; y < rc->bottom; y++, dst_ptr += dst->stride, src_ptr += src->stride / 4) { - DWORD val = (dst_ptr[x / 8] & pixel_masks_1[x % 8]) ? 1 : 0; - RGBQUAD rgb = color_table[val]; - val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[i], blend ); - val = rgb_to_pixel_colortable(dst, val >> 16, val >> 8, val) ? 0xff : 0; - dst_ptr[x / 8] = (dst_ptr[x / 8] & ~pixel_masks_1[x % 8]) | (val & pixel_masks_1[x % 8]); + for (j = 0, x = (dst->rect.left + rc->left) & 7; j < rc->right - rc->left; j++, x++) + { + DWORD val = (dst_ptr[x / 8] & pixel_masks_1[x % 8]) ? 1 : 0; + RGBQUAD rgb = color_table[val]; + val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); + val = rgb_to_pixel_colortable(dst, val >> 16, val >> 8, val) ? 0xff : 0; + dst_ptr[x / 8] = (dst_ptr[x / 8] & ~pixel_masks_1[x % 8]) | (val & pixel_masks_1[x % 8]); + } } } }
-static void blend_rect_null(const dib_info *dst, const RECT *rc, - const dib_info *src, const POINT *origin, BLENDFUNCTION blend) +static void blend_rect_null(const dib_info *dst, const dib_info *src, const POINT *offset, + const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { }
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com --- dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } }
-static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/* + * To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB + * at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits + * of the color are thus set to halfway (0x04) and then it's used in the distance calculation + * to the exact color in the color table. We exploit this as well to create a lookup cache. +*/ +struct rgb_lookup_colortable_ctx +{ + const dib_info *dib; + BYTE map[32768]; + BYTE valid[32768 / 8]; +}; + +static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) { - if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); - return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); + ctx->dib = dib; + memset(ctx->valid, 0, sizeof(ctx->valid)); }
-static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) { - /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */ - return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 ); + unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7; + + if (!(ctx->valid[pos / 8] & (1 << pos % 8))) + { + ctx->valid[pos / 8] |= 1 << pos % 8; + ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4); + } + return ctx->map[pos]; +} + +static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +{ + if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table); + return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0])); }
static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rect, BOOL dither) { BYTE *dst_start = get_pixel_ptr_8(dst, 0, 0), *dst_pixel; INT x, y, pad_size = ((dst->width + 3) & ~3) - (src_rect->right - src_rect->left); + struct rgb_lookup_colortable_ctx lookup_ctx; DWORD src_val;
switch(src->bit_count) @@ -3521,6 +3547,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec { DWORD *src_start = get_pixel_ptr_32(src, src_rect->left, src_rect->top), *src_pixel;
+ rgb_lookup_colortable_init(dst, &lookup_ctx); if(src->funcs == &funcs_8888) { for(y = src_rect->top; y < src_rect->bottom; y++) @@ -3530,7 +3557,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, src_val >> 16, src_val >> 8, src_val ); + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_val >> 16, src_val >> 8, src_val ); } if(pad_size) memset(dst_pixel, 0, pad_size); dst_start += dst->stride; @@ -3546,7 +3573,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_val >> src->red_shift, src_val >> src->green_shift, src_val >> src->blue_shift ); @@ -3565,7 +3592,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, get_field(src_val, src->red_shift, src->red_len), get_field(src_val, src->green_shift, src->green_len), get_field(src_val, src->blue_shift, src->blue_len)); @@ -3582,13 +3609,14 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec { BYTE *src_start = get_pixel_ptr_24(src, src_rect->left, src_rect->top), *src_pixel;
+ rgb_lookup_colortable_init(dst, &lookup_ctx); for(y = src_rect->top; y < src_rect->bottom; y++) { dst_pixel = dst_start; src_pixel = src_start; for(x = src_rect->left; x < src_rect->right; x++, src_pixel += 3) { - *dst_pixel++ = rgb_lookup_colortable(dst, src_pixel[2], src_pixel[1], src_pixel[0] ); + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, src_pixel[2], src_pixel[1], src_pixel[0] ); } if(pad_size) memset(dst_pixel, 0, pad_size); dst_start += dst->stride; @@ -3600,6 +3628,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec case 16: { WORD *src_start = get_pixel_ptr_16(src, src_rect->left, src_rect->top), *src_pixel; + rgb_lookup_colortable_init(dst, &lookup_ctx); if(src->funcs == &funcs_555) { for(y = src_rect->top; y < src_rect->bottom; y++) @@ -3609,7 +3638,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, ((src_val >> 7) & 0xf8) | ((src_val >> 12) & 0x07), ((src_val >> 2) & 0xf8) | ((src_val >> 7) & 0x07), ((src_val << 3) & 0xf8) | ((src_val >> 2) & 0x07) ); @@ -3628,7 +3657,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, (((src_val >> src->red_shift) << 3) & 0xf8) | (((src_val >> src->red_shift) >> 2) & 0x07), (((src_val >> src->green_shift) << 3) & 0xf8) | @@ -3650,7 +3679,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, (((src_val >> src->red_shift) << 3) & 0xf8) | (((src_val >> src->red_shift) >> 2) & 0x07), (((src_val >> src->green_shift) << 2) & 0xfc) | @@ -3672,7 +3701,7 @@ static void convert_to_8(dib_info *dst, const dib_info *src, const RECT *src_rec for(x = src_rect->left; x < src_rect->right; x++) { src_val = *src_pixel++; - *dst_pixel++ = rgb_lookup_colortable(dst, + *dst_pixel++ = rgb_lookup_colortable(&lookup_ctx, get_field(src_val, src->red_shift, src->red_len), get_field(src_val, src->green_shift, src->green_len), get_field(src_val, src->blue_shift, src->blue_len)); @@ -4807,8 +4836,10 @@ static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT * const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); + struct rgb_lookup_colortable_ctx lookup_ctx; int i, x, y;
+ rgb_lookup_colortable_init( dst, &lookup_ctx ); for (i = 0; i < clipped_rects->count; i++) { const RECT *rc = &clipped_rects->rects[i]; @@ -4821,7 +4852,7 @@ static void blend_rect_8(const dib_info *dst, const dib_info *src, const POINT * { RGBQUAD rgb = color_table[dst_ptr[x]]; DWORD val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[x], blend ); - dst_ptr[x] = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + dst_ptr[x] = rgb_lookup_colortable( &lookup_ctx, val >> 16, val >> 8, val ); } } } @@ -4831,8 +4862,10 @@ static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT * const struct clipped_rects *clipped_rects, BLENDFUNCTION blend) { const RGBQUAD *color_table = get_dib_color_table( dst ); + struct rgb_lookup_colortable_ctx lookup_ctx; int i, j, x, y;
+ rgb_lookup_colortable_init( dst, &lookup_ctx ); for (i = 0; i < clipped_rects->count; i++) { const RECT *rc = &clipped_rects->rects[i]; @@ -4846,7 +4879,7 @@ static void blend_rect_4(const dib_info *dst, const dib_info *src, const POINT * DWORD val = ((x & 1) ? dst_ptr[x / 2] : (dst_ptr[x / 2] >> 4)) & 0x0f; RGBQUAD rgb = color_table[val]; val = blend_rgb( rgb.rgbRed, rgb.rgbGreen, rgb.rgbBlue, src_ptr[j], blend ); - val = rgb_lookup_colortable( dst, val >> 16, val >> 8, val ); + val = rgb_lookup_colortable( &lookup_ctx, val >> 16, val >> 8, val ); if (x & 1) dst_ptr[x / 2] = val | (dst_ptr[x / 2] & 0xf0); else
On Thu, Apr 08, 2021 at 04:11:04PM +0300, Gabriel Ivăncescu wrote:
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com
dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } }
-static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/*
- To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB
- at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits
- of the color are thus set to halfway (0x04) and then it's used in the distance calculation
- to the exact color in the color table. We exploit this as well to create a lookup cache.
+*/ +struct rgb_lookup_colortable_ctx +{
- const dib_info *dib;
- BYTE map[32768];
- BYTE valid[32768 / 8];
+};
+static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) {
- if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table);
- return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0]));
- ctx->dib = dib;
- memset(ctx->valid, 0, sizeof(ctx->valid));
}
-static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) {
- /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */
- return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 );
- unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7;
- if (!(ctx->valid[pos / 8] & (1 << pos % 8)))
- {
ctx->valid[pos / 8] |= 1 << pos % 8;
ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4);
- }
- return ctx->map[pos];
+}
I've sent in v4 of this series in which I've tweaked this a bit to give the compiler more of a chance to optimize things as well as using a lookup for the pixel masks.
With a 300x300 32-bpp -> 8-bpp BitBlt I'm getting performance slighty better than Windows if there aren't many distinct RGB values and a little worse if the entire map needs filling.
Huw.
On 16/04/2021 13:17, Huw Davies wrote:
On Thu, Apr 08, 2021 at 04:11:04PM +0300, Gabriel Ivăncescu wrote:
Signed-off-by: Gabriel Ivăncescu gabrielopcode@gmail.com
dlls/gdi32/dibdrv/primitives.c | 65 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 16 deletions(-)
diff --git a/dlls/gdi32/dibdrv/primitives.c b/dlls/gdi32/dibdrv/primitives.c index 0a5f7e5..be68058 100644 --- a/dlls/gdi32/dibdrv/primitives.c +++ b/dlls/gdi32/dibdrv/primitives.c @@ -3497,22 +3497,48 @@ static void convert_to_16(dib_info *dst, const dib_info *src, const RECT *src_re } }
-static inline BOOL color_tables_match(const dib_info *d1, const dib_info *d2) +/*
- To lookup RGB values into nearest color in the color table, Windows uses 5-bits of the RGB
- at the "center" of the RGB cube, presumably to do a similar lookup cache. The lowest 3 bits
- of the color are thus set to halfway (0x04) and then it's used in the distance calculation
- to the exact color in the color table. We exploit this as well to create a lookup cache.
+*/ +struct rgb_lookup_colortable_ctx +{
- const dib_info *dib;
- BYTE map[32768];
- BYTE valid[32768 / 8];
+};
+static void rgb_lookup_colortable_init(const dib_info *dib, struct rgb_lookup_colortable_ctx *ctx) {
- if (!d1->color_table || !d2->color_table) return (!d1->color_table && !d2->color_table);
- return !memcmp(d1->color_table, d2->color_table, (1 << d1->bit_count) * sizeof(d1->color_table[0]));
- ctx->dib = dib;
- memset(ctx->valid, 0, sizeof(ctx->valid)); }
-static inline DWORD rgb_lookup_colortable(const dib_info *dst, BYTE r, BYTE g, BYTE b) +static inline BYTE rgb_lookup_colortable(struct rgb_lookup_colortable_ctx *ctx, BYTE r, BYTE g, BYTE b) {
- /* Windows reduces precision to 5 bits, probably in order to build some sort of lookup cache */
- return rgb_to_pixel_colortable( dst, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4 );
- unsigned pos = (r >> 3) | (g & ~7) << 2 | (b & ~7) << 7;
- if (!(ctx->valid[pos / 8] & (1 << pos % 8)))
- {
ctx->valid[pos / 8] |= 1 << pos % 8;
ctx->map[pos] = rgb_to_pixel_colortable(ctx->dib, (r & ~7) + 4, (g & ~7) + 4, (b & ~7) + 4);
- }
- return ctx->map[pos];
+}
I've sent in v4 of this series in which I've tweaked this a bit to give the compiler more of a chance to optimize things as well as using a lookup for the pixel masks.
With a 300x300 32-bpp -> 8-bpp BitBlt I'm getting performance slighty better than Windows if there aren't many distinct RGB values and a little worse if the entire map needs filling.
Huw.
Looks good, thanks.