-- v2: gdiplus: Optimize GDI32_GdipDrawPath (via prepare_dc function)
From: Bartosz Kosiorek gang65@poczta.onet.pl
--- dlls/gdiplus/graphics.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/dlls/gdiplus/graphics.c b/dlls/gdiplus/graphics.c index 65b33cdc960..58b2de213a6 100644 --- a/dlls/gdiplus/graphics.c +++ b/dlls/gdiplus/graphics.c @@ -268,15 +268,13 @@ static INT prepare_dc(GpGraphics *graphics, GpPen *pen) width = pen->width; } else{ + REAL scale_x, scale_y; /* Get an estimate for the amount the pen width is affected by the world * transform. (This is similar to what some of the wine drivers do.) */ - pt[0].X = 0.0; - pt[0].Y = 0.0; - pt[1].X = 1.0; - pt[1].Y = 1.0; - GdipTransformMatrixPoints(&graphics->worldtrans, pt, 2); - width = sqrt((pt[1].X - pt[0].X) * (pt[1].X - pt[0].X) + - (pt[1].Y - pt[0].Y) * (pt[1].Y - pt[0].Y)) / sqrt(2.0); + scale_x = graphics->worldtrans.matrix[0] + graphics->worldtrans.matrix[2]; + scale_y = graphics->worldtrans.matrix[1] + graphics->worldtrans.matrix[3]; + + width = sqrt(scale_x * scale_x + scale_y * scale_y) / sqrt(2.0);
width *= units_to_pixels(pen->width, pen->unit == UnitWorld ? graphics->unit : pen->unit, graphics->xres, graphics->printer_display);
On Sat Oct 14 21:44:43 2023 +0000, Bartosz Kosiorek wrote:
changed this line in [version 2 of the diff](/wine/wine/-/merge_requests/3971/diffs?diff_id=76255&start_sha=dc35791e81776b58b2473c7dc67b827fdf5be8c9#aaf93e29527c1cbc47ec54679da5828ebd679128_279_277)
fixed
On Sat Oct 14 21:45:52 2023 +0000, Nikolay Sivov wrote:
What does this optimize for? Extra function call made once, outside of the loop?
I have removed that code.
On Sat Oct 14 21:46:03 2023 +0000, Jeffrey Smith wrote:
Is the commit title accurate? I don't see changes that directly affect `GdipDrawImagePointsRect`.
Fixed
Bartosz Kosiorek (@gang65) commented about dlls/gdiplus/graphics.c:
pt[0].X = 0.0; pt[0].Y = 0.0; pt[1].X = 1.0; pt[1].Y = 1.0;
We could optimize even more, by calculating only one sqrt, with: ``` scale_x = graphics->worldtrans.matrix[0] + graphics->worldtrans.matrix[2]; scale_y = graphics->worldtrans.matrix[1] + graphics->worldtrans.matrix[3];
width *= units_to_pixels(pen->width, pen->unit == UnitWorld ? graphics->unit : pen->unit, graphics->xres, graphics->printer_display); width *= graphics->scale;
pt[0].X = 0.0; pt[0].Y = 0.0; pt[1].X = scale_x; pt[1].Y = scale_y;
gdip_transform_points(graphics, WineCoordinateSpaceGdiDevice, CoordinateSpaceDevice, pt, 2); width *= sqrt((pt[1].X - pt[0].X) * (pt[1].X - pt[0].X) + (pt[1].Y - pt[0].Y) * (pt[1].Y - pt[0].Y)) / sqrt(2.0); ```
On Sat Oct 14 21:50:56 2023 +0000, Bartosz Kosiorek wrote:
We could optimize even more, by calculating only one `sqrt`, with:
scale_x = graphics->worldtrans.matrix[0] + graphics->worldtrans.matrix[2]; scale_y = graphics->worldtrans.matrix[1] + graphics->worldtrans.matrix[3]; pt[0].X = 0.0; pt[0].Y = 0.0; pt[1].X = scale_x; pt[1].Y = scale_y; gdip_transform_points(graphics, WineCoordinateSpaceGdiDevice, CoordinateSpaceDevice, pt, 2); width = sqrt((pt[1].X - pt[0].X) * (pt[1].X - pt[0].X) + (pt[1].Y - pt[0].Y) * (pt[1].Y - pt[0].Y)) / sqrt(2.0); width *= units_to_pixels(pen->width, pen->unit == UnitWorld ? graphics->unit : pen->unit, graphics->xres, graphics->printer_display); width *= graphics->scale;
I think it makes more sense to have some inline helper if needed, to avoid calling public functions internally, if that's actually a problem at all (do we have any kind of performance numbers?) rather than unrolling.
On Sat Oct 14 21:51:50 2023 +0000, Nikolay Sivov wrote:
I think it makes more sense to have some inline helper if needed, to avoid calling public functions internally, if that's actually a problem at all (do we have any kind of performance numbers?) rather than unrolling.
With previous implementation we have 2*4 float multiplication inside `GdipTransformMatrixPoints` function: ``` pts[i].X = x * matrix->matrix[0] + y * matrix->matrix[2] + matrix->matrix[4]; pts[i].Y = x * matrix->matrix[1] + y * matrix->matrix[3] + matrix->matrix[5]; ```
Here is the full source code: ```
GpStatus WINGDIPAPI GdipTransformMatrixPoints(GpMatrix *matrix, GpPointF *pts, INT count) { REAL x, y; INT i;
TRACE("(%s, %p, %d)\n", debugstr_matrix(matrix), pts, count);
if(!matrix || !pts || count <= 0) return InvalidParameter;
for(i = 0; i < count; i++) { x = pts[i].X; y = pts[i].Y;
pts[i].X = x * matrix->matrix[0] + y * matrix->matrix[2] + matrix->matrix[4]; pts[i].Y = x * matrix->matrix[1] + y * matrix->matrix[3] + matrix->matrix[5]; }
return Ok; } ```
As the vector is (0,0) and (1, 1), we could replace invocation of this function by simple assigment (no need to multiple anything): ``` scale_x = graphics->worldtrans.matrix[0] + graphics->worldtrans.matrix[2]; scale_y = graphics->worldtrans.matrix[1] + graphics->worldtrans.matrix[3]; ```
We could also optimize it more and calculate width via `sqrt` only once.