[PATCH v8 0/1] MR10528: oleaut32: Add fast paths for VarAdd and VarCmp with integer operands.
Add fast paths for `VarAdd` and `VarCmp` when both operands are simple integer types (VT_I2, VT_I4, VT_EMPTY), skipping the `VariantChangeType` / `VariantCopyInd` pipeline and the locale grab/free cycle in ucrtbase. `VarAdd` and `VarCmp` are called on every iteration of an integer For-loop (counter increment) and on every conditional comparison. Profiling VBScript workloads showed the full VARIANT conversion path dominated that cost. ### Benchmarks The suite skews heavily toward integer For-loops — most scenarios (conditionals, local reads, class property access) are measured *inside* a tight `For i = 1 To N` loop, so the loop counter itself contributes to the speedup. Numbers below are median of 3 runs on Wine with current `master` as baseline. | Scenario | Before | After | Speedup | |----------|--------|-------|---------| | Integer For-loop, empty body (10M) | 2678 ms | 364 ms | 7.4× | | If-condition inside loop (10M) | 3678 ms | 718 ms | 5.1× | | Local variable reads in loop (1M × 10) | 1247 ms | 403 ms | 3.1× | | Class property read in loop (500K) | 289 ms | 164 ms | 1.8× | | For R8 counter (not fast-pathed) | 2910 ms | 2764 ms | ~same | | String concatenation (no loop counter) | 70 ms | 70 ms | ~same | The headline numbers are a ceiling for integer-arithmetic-heavy code. Scripts where the dominant cost is string manipulation, COM dispatch, or I/O will see little change — as the last two rows show. ### Real-world impact 10-23% tick speedup on most visual pinball tables. Tables with heavier VarAdd/VarCmp integer usage benefit most. -- v8: oleaut32: Add fast paths for VarAdd and VarCmp with integer operands. https://gitlab.winehq.org/wine/wine/-/merge_requests/10528
From: Francis De Brabandere <francisdb@gmail.com> Skip the expensive VariantChangeType/VariantCopyInd machinery when both operands are simple integer types (VT_I2, VT_I4, VT_EMPTY). For VarAdd, handle overflow promotion (I2+I2 that overflows to I4, I4+I4 that overflows to R8) matching native Windows behavior. For VarCmp, compare via LONGLONG to handle mixed I2/I4 correctly. This eliminates the dominant cost in VBScript For-loop step/compare operations, where VarAdd and VarCmp are called every iteration with trivial I2 operands but previously went through the full VARIANT conversion pipeline. --- dlls/oleaut32/variant.c | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/dlls/oleaut32/variant.c b/dlls/oleaut32/variant.c index 8b5e41ec66d..f9a51b41556 100644 --- a/dlls/oleaut32/variant.c +++ b/dlls/oleaut32/variant.c @@ -2784,6 +2784,34 @@ HRESULT WINAPI VarCmp(LPVARIANT left, LPVARIANT right, LCID lcid, DWORD flags) TRACE("%s, %s, %#lx, %#lx.\n", debugstr_variant(left), debugstr_variant(right), lcid, flags); + /* Fast path for common integer comparisons: skip expensive VariantChangeType */ + if (V_VT(left) == VT_I2 && V_VT(right) == VT_I2) + return (V_I2(left) == V_I2(right)) ? VARCMP_EQ : + (V_I2(left) < V_I2(right)) ? VARCMP_LT : VARCMP_GT; + + if (!(V_VT(left) & ~VT_TYPEMASK) && !(V_VT(right) & ~VT_TYPEMASK)) + { + LONGLONG lval, rval; + BOOL left_i = TRUE, right_i = TRUE; + + switch (V_VT(left)) + { + case VT_I4: lval = V_I4(left); break; + case VT_I2: lval = V_I2(left); break; + case VT_EMPTY: lval = 0; break; + default: left_i = FALSE; break; + } + switch (V_VT(right)) + { + case VT_I4: rval = V_I4(right); break; + case VT_I2: rval = V_I2(right); break; + case VT_EMPTY: rval = 0; break; + default: right_i = FALSE; break; + } + if (left_i && right_i) + return (lval == rval) ? VARCMP_EQ : (lval < rval) ? VARCMP_LT : VARCMP_GT; + } + lvt = V_VT(left) & VT_TYPEMASK; rvt = V_VT(right) & VT_TYPEMASK; xmask = (1 << lvt) | (1 << rvt); @@ -3241,6 +3269,76 @@ HRESULT WINAPI VarAdd(LPVARIANT left, LPVARIANT right, LPVARIANT result) TRACE("(%s,%s,%p)\n", debugstr_variant(left), debugstr_variant(right), result); + /* Fast path for common integer additions: skip expensive VariantChangeType */ + if (V_VT(left) == VT_I2 && V_VT(right) == VT_I2) + { + int sum = (int)V_I2(left) + (int)V_I2(right); + if (sum >= -32768 && sum <= 32767) + { + V_VT(result) = VT_I2; + V_I2(result) = sum; + } + else + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + return S_OK; + } + if (!(V_VT(left) & ~VT_TYPEMASK) && !(V_VT(right) & ~VT_TYPEMASK)) + { + LONGLONG lval, rval; + BOOL left_i = TRUE, right_i = TRUE; + + switch (V_VT(left)) + { + case VT_I4: lval = V_I4(left); break; + case VT_I2: lval = V_I2(left); break; + case VT_EMPTY: lval = 0; break; + default: left_i = FALSE; break; + } + switch (V_VT(right)) + { + case VT_I4: rval = V_I4(right); break; + case VT_I2: rval = V_I2(right); break; + case VT_EMPTY: rval = 0; break; + default: right_i = FALSE; break; + } + if (left_i && right_i) + { + LONGLONG sum = lval + rval; + if (V_VT(left) == VT_I4 || V_VT(right) == VT_I4) + { + if (sum >= (LONGLONG)LONG_MIN && sum <= (LONGLONG)LONG_MAX) + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + else + { + /* I4 overflow promotes to R8 (not I8) per Windows behavior */ + V_VT(result) = VT_R8; + V_R8(result) = (double)sum; + } + } + else + { + /* Both are I2 or EMPTY: result is I2, overflow promotes to I4 */ + if (sum >= -32768 && sum <= 32767) + { + V_VT(result) = VT_I2; + V_I2(result) = sum; + } + else + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + } + return S_OK; + } + } + VariantInit(&lv); VariantInit(&rv); VariantInit(&tv); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10528
participants (2)
-
Francis De Brabandere -
Francis De Brabandere (@francisdb)