-- v5: gdiplus: improve performance of matrix multiplication by unrolling loop. gdiplus: improve performance of GdipScaleMatrix
From: Bartosz Kosiorek gang65@poczta.onet.pl
Wine-Bug: https://bugs.winehq.org/show_bug.cgi?id=53947 --- dlls/gdiplus/matrix.c | 25 +++++++++++++---------- dlls/gdiplus/tests/matrix.c | 40 +++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 11 deletions(-)
diff --git a/dlls/gdiplus/matrix.c b/dlls/gdiplus/matrix.c index 40abbc93e21..13e7aa05e11 100644 --- a/dlls/gdiplus/matrix.c +++ b/dlls/gdiplus/matrix.c @@ -287,24 +287,27 @@ GpStatus WINGDIPAPI GdipRotateMatrix(GpMatrix *matrix, REAL angle, GpStatus WINGDIPAPI GdipScaleMatrix(GpMatrix *matrix, REAL scaleX, REAL scaleY, GpMatrixOrder order) { - REAL scale[6]; - TRACE("(%p, %.2f, %.2f, %d)\n", matrix, scaleX, scaleY, order);
if(!matrix) return InvalidParameter;
- scale[0] = scaleX; - scale[1] = 0.0; - scale[2] = 0.0; - scale[3] = scaleY; - scale[4] = 0.0; - scale[5] = 0.0; - if(order == MatrixOrderAppend) - matrix_multiply(matrix->matrix, scale, matrix->matrix); + { + matrix->matrix[0] *= scaleX; + matrix->matrix[1] *= scaleY; + matrix->matrix[2] *= scaleX; + matrix->matrix[3] *= scaleY; + matrix->matrix[4] *= scaleX; + matrix->matrix[5] *= scaleY; + } else if (order == MatrixOrderPrepend) - matrix_multiply(scale, matrix->matrix, matrix->matrix); + { + matrix->matrix[0] *= scaleX; + matrix->matrix[1] *= scaleX; + matrix->matrix[2] *= scaleY; + matrix->matrix[3] *= scaleY; + } else return InvalidParameter;
diff --git a/dlls/gdiplus/tests/matrix.c b/dlls/gdiplus/tests/matrix.c index 5ca0209f6a9..a2f6916fd66 100644 --- a/dlls/gdiplus/tests/matrix.c +++ b/dlls/gdiplus/tests/matrix.c @@ -110,6 +110,45 @@ static void test_transform(void) GdipDeleteMatrix(matrix); }
+static void test_scale(void) +{ + GpStatus status; + GpMatrix *matrix = NULL; + REAL elems[6]; + + static const REAL expected_elem[] = {3.0, -4.0, 90.0, 80.0, -1500.0, 1200.0}; + static const REAL expected_elem2[] = {3.0, -6.0, 60.0, 80.0, -500.0, 600.0}; + + GdipCreateMatrix2(1.0, -2.0, 30.0, 40.0, -500.0, 600.0, &matrix); + + status = GdipScaleMatrix(NULL, 3, 2, MatrixOrderAppend); + expect(InvalidParameter, status); + status = GdipScaleMatrix(matrix, 3, 2, MatrixOrderAppend); + expect(Ok, status); + + status = GdipGetMatrixElements(matrix, elems); + expect(Ok, status); + GdipDeleteMatrix(matrix); + + for(INT i = 0; i < 6; i++) + ok(expected_elem[i] == elems[i], "Expected #%d to be (%.1f) but got (%.1f)\n", i, + expected_elem[i], elems[i]); + + GdipCreateMatrix2(1.0, -2.0, 30.0, 40.0, -500.0, 600.0, &matrix); + + status = GdipScaleMatrix(matrix, 3, 2, MatrixOrderPrepend); + expect(Ok, status); + + status = GdipGetMatrixElements(matrix, elems); + expect(Ok, status); + GdipDeleteMatrix(matrix); + + for(INT i = 0; i < 6; i++) + ok(expected_elem2[i] == elems[i], "Expected #%d to be (%.1f) but got (%.1f)\n", i, + expected_elem2[i], elems[i]); + +} + static void test_isinvertible(void) { GpStatus status; @@ -390,6 +429,7 @@ START_TEST(matrix) GdiplusStartup(&gdiplusToken, &gdiplusStartupInput, NULL);
test_constructor_destructor(); + test_scale(); test_transform(); test_isinvertible(); test_invert();
From: Bartosz Kosiorek gang65@poczta.onet.pl
--- dlls/gdiplus/matrix.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/dlls/gdiplus/matrix.c b/dlls/gdiplus/matrix.c index 13e7aa05e11..a1adef9f9a0 100644 --- a/dlls/gdiplus/matrix.c +++ b/dlls/gdiplus/matrix.c @@ -42,14 +42,12 @@ WINE_DEFAULT_DEBUG_CHANNEL(gdiplus); static void matrix_multiply(GDIPCONST REAL * left, GDIPCONST REAL * right, REAL * out) { REAL temp[6]; - int i, odd; - - for(i = 0; i < 6; i++){ - odd = i % 2; - temp[i] = left[i - odd] * right[odd] + left[i - odd + 1] * right[odd + 2] + - (i >= 4 ? right[odd + 4] : 0.0); - } - + temp[0] = left[0] * right[0] + left[1] * right[2]; + temp[1] = left[0] * right[1] + left[1] * right[3]; + temp[2] = left[2] * right[0] + left[3] * right[2]; + temp[3] = left[2] * right[1] + left[3] * right[3]; + temp[4] = left[4] * right[0] + left[5] * right[2] + right[4]; + temp[5] = left[4] * right[1] + left[5] * right[3] + right[5]; memcpy(out, temp, 6 * sizeof(REAL)); }
On Fri Dec 2 15:48:11 2022 +0000, Bartosz Kosiorek wrote:
The performance improvement is quite significant: Running Times on Linux 64 bit: Native (Windows gdiplus.dll)
- 500 Matrix Scaling time (seconds): 0.84s
- 700 Matrix Multipling time (seconds): 1.22s
Wine gdiplus.dll without optimizations:
- 500 Matrix Scaling time (seconds): 0.28s
- 700 Matrix Multipling time (seconds): 0.35s
Wine gdiplus.dll with optimizations:
- 500 Matrix Scaling time (seconds): 0.13s
- 700 Matrix Multipling time (seconds): 0.17s
Application which I am using for testing: [gdiplusdisplay.exe](/uploads/c5c790eaef4824c2125d56b95714b05e/gdiplusdisplay.exe)
What I'm saying is that maybe just inlining that function is enough already. Enrolling might be happening automatically, have you checked compiler output?
On Fri Dec 2 15:59:22 2022 +0000, Nikolay Sivov wrote:
What I'm saying is that maybe just inlining that function is enough already. Enrolling might be happening automatically, have you checked compiler output?
I will check that. I think inlining should be applied anyway. Thanks for suggesting.
Still with enrolling I don't need to calculate the modulo every loop and array indexes are hardcoded (no need to calculate it).
Unfortunately I am not expert with checking compiler output. Could you please give some advice regarding that?