[PATCH v2 0/1] MR10528: oleaut32: Add fast paths for VarAdd and VarCmp with integer operands.
Add fast paths for `VarAdd` and `VarCmp` when both operands are simple integer types (VT_I2, VT_I4, VT_EMPTY), skipping the expensive `VariantChangeType`/`VariantCopyInd` machinery. Profiling VBScript For-loops showed that `VarAdd`/`VarCmp` are called every iteration for the loop counter, and the full VARIANT conversion pipeline (including locale allocation in ucrtbase) was the dominant cost. With this patch, a VBScript For-loop inside a Sub goes from 3,041ms to 1,712ms for 10M iterations (**1.8× faster**). Benchmark results (VBScript `cscript`, 10M iterations unless noted): | Benchmark | Before | After | Speedup | |-----------|--------|-------|---------| | For-loop in Sub | 3,041 ms | 1,712 ms | **1.8×** | | Empty For (global) | 6,125 ms | 4,792 ms | **1.3×** | | For Step 2 (5M) | 3,058 ms | 2,404 ms | **1.3×** | | For R8 counter | 10,824 ms | 10,878 ms | ~same (R8 not fast-pathed) | Global-scope loops show smaller gains because VBScript's string-based identifier lookup still dominates there (see !10515). Combined with !10515, integer For-loops reach 1.2–1.5× of native Windows performance (up to 51× faster than current master). -- v2: oleaut32: Add fast paths for VarAdd and VarCmp with integer operands. https://gitlab.winehq.org/wine/wine/-/merge_requests/10528
From: Francis De Brabandere <francisdb@gmail.com> Skip the expensive VariantChangeType/VariantCopyInd machinery when both operands are simple integer types (VT_I2, VT_I4, VT_EMPTY). For VarAdd, handle overflow promotion (I2+I2 that overflows to I4, I4+I4 that overflows to R8) matching native Windows behavior. For VarCmp, compare via LONGLONG to handle mixed I2/I4 correctly. This eliminates the dominant cost in VBScript For-loop step/compare operations, where VarAdd and VarCmp are called every iteration with trivial I2 operands but previously went through the full VARIANT conversion pipeline. --- dlls/oleaut32/variant.c | 98 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/dlls/oleaut32/variant.c b/dlls/oleaut32/variant.c index 48851ba05ec..e1f2e7f0776 100644 --- a/dlls/oleaut32/variant.c +++ b/dlls/oleaut32/variant.c @@ -2784,6 +2784,34 @@ HRESULT WINAPI VarCmp(LPVARIANT left, LPVARIANT right, LCID lcid, DWORD flags) TRACE("%s, %s, %#lx, %#lx.\n", debugstr_variant(left), debugstr_variant(right), lcid, flags); + /* Fast path for common integer comparisons: skip expensive VariantChangeType */ + if (V_VT(left) == VT_I2 && V_VT(right) == VT_I2) + return (V_I2(left) == V_I2(right)) ? VARCMP_EQ : + (V_I2(left) < V_I2(right)) ? VARCMP_LT : VARCMP_GT; + + if (!(V_VT(left) & ~VT_TYPEMASK) && !(V_VT(right) & ~VT_TYPEMASK)) + { + LONGLONG lval, rval; + BOOL left_i = TRUE, right_i = TRUE; + + switch (V_VT(left)) + { + case VT_I4: lval = V_I4(left); break; + case VT_I2: lval = V_I2(left); break; + case VT_EMPTY: lval = 0; break; + default: left_i = FALSE; break; + } + switch (V_VT(right)) + { + case VT_I4: rval = V_I4(right); break; + case VT_I2: rval = V_I2(right); break; + case VT_EMPTY: rval = 0; break; + default: right_i = FALSE; break; + } + if (left_i && right_i) + return (lval == rval) ? VARCMP_EQ : (lval < rval) ? VARCMP_LT : VARCMP_GT; + } + lvt = V_VT(left) & VT_TYPEMASK; rvt = V_VT(right) & VT_TYPEMASK; xmask = (1 << lvt) | (1 << rvt); @@ -3224,6 +3252,76 @@ HRESULT WINAPI VarAdd(LPVARIANT left, LPVARIANT right, LPVARIANT result) TRACE("(%s,%s,%p)\n", debugstr_variant(left), debugstr_variant(right), result); + /* Fast path for common integer additions: skip expensive VariantChangeType */ + if (V_VT(left) == VT_I2 && V_VT(right) == VT_I2) + { + int sum = (int)V_I2(left) + (int)V_I2(right); + if (sum >= -32768 && sum <= 32767) + { + V_VT(result) = VT_I2; + V_I2(result) = sum; + } + else + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + return S_OK; + } + if (!(V_VT(left) & ~VT_TYPEMASK) && !(V_VT(right) & ~VT_TYPEMASK)) + { + LONGLONG lval, rval; + BOOL left_i = TRUE, right_i = TRUE; + + switch (V_VT(left)) + { + case VT_I4: lval = V_I4(left); break; + case VT_I2: lval = V_I2(left); break; + case VT_EMPTY: lval = 0; break; + default: left_i = FALSE; break; + } + switch (V_VT(right)) + { + case VT_I4: rval = V_I4(right); break; + case VT_I2: rval = V_I2(right); break; + case VT_EMPTY: rval = 0; break; + default: right_i = FALSE; break; + } + if (left_i && right_i) + { + LONGLONG sum = lval + rval; + if (V_VT(left) == VT_I4 || V_VT(right) == VT_I4) + { + if (sum >= (LONGLONG)LONG_MIN && sum <= (LONGLONG)LONG_MAX) + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + else + { + /* I4 overflow promotes to R8 (not I8) per Windows behavior */ + V_VT(result) = VT_R8; + V_R8(result) = (double)sum; + } + } + else + { + /* Both are I2 or EMPTY: result is I2, overflow promotes to I4 */ + if (sum >= -32768 && sum <= 32767) + { + V_VT(result) = VT_I2; + V_I2(result) = sum; + } + else + { + V_VT(result) = VT_I4; + V_I4(result) = sum; + } + } + return S_OK; + } + } + VariantInit(&lv); VariantInit(&rv); VariantInit(&tv); -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/10528
participants (2)
-
Francis De Brabandere -
Francis De Brabandere (@francisdb)