-- v3: gdiplus: move pointer calculation outside inner loop to improve performance
From: Bartosz Kosiorek gang65@poczta.onet.pl
--- dlls/gdiplus/graphics.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/dlls/gdiplus/graphics.c b/dlls/gdiplus/graphics.c index e3b5661fd67..a2a9c3db186 100644 --- a/dlls/gdiplus/graphics.c +++ b/dlls/gdiplus/graphics.c @@ -422,28 +422,27 @@ static GpStatus alpha_blend_bmp_pixels(GpGraphics *graphics, INT dst_x, INT dst_
for (y=0; y<src_height; y++) { + ARGB *src_color = (ARGB*)(src + src_stride * y); for (x=0; x<src_width; x++) { - ARGB dst_color, src_color; - src_color = ((ARGB*)(src + src_stride * y))[x]; - if (comp_mode == CompositingModeSourceCopy) { - if (!(src_color & 0xff000000)) + if (!(src_color[x] & 0xff000000)) GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, 0); else - GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, src_color); + GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, src_color[x]); } else { - if (!(src_color & 0xff000000)) + ARGB dst_color; + if (!(src_color[x] & 0xff000000)) continue;
GdipBitmapGetPixel(dst_bitmap, x+dst_x, y+dst_y, &dst_color); if (fmt & PixelFormatPAlpha) - GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, color_over_fgpremult(dst_color, src_color)); + GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, color_over_fgpremult(dst_color, src_color[x])); else - GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, color_over(dst_color, src_color)); + GdipBitmapSetPixel(dst_bitmap, x+dst_x, y+dst_y, color_over(dst_color, src_color[x])); } } }
On Fri Aug 11 08:34:38 2023 +0000, Bartosz Kosiorek wrote:
In previous implementation we calculated `src_color` position every time (in `for (x` loop):
for (x=0; x<src_width; x++) { ARGB dst_color, src_color; src_color = ((ARGB*)(src + src_stride * y))[x];
With new implementation we are calculating `src_color` position only once (outside `x` loop), and we just iterating the `src_color++`:
ARGB *src_color = ((ARGB*)(src + src_stride * y)); for (x=0; x<src_width; x++) { ... src_color++; }
It is faster (avoiding not needed multiplications), especially for wide images.
I asked because usually claiming something is faster also needs some performance numbers.
On Fri Aug 11 09:01:05 2023 +0000, Bartosz Kosiorek wrote:
It seems that Linux32 tests are failing with previous changes. Linux64 test were passing.
I have replaced it with array