STRD is an ARMv7 instruction that stores two consecutive 32-bit words in one operation. A single STRD saves one clock cycle compared to two LDRs. Shortened code also leads to less time spent on instruction fetch and more efficient I-cache utilization.
Running llvm-mca --timeline on the old code reports: 0123 Index 0123456789
[0,0] DeeeeER . . ldr.w r1, [r1, #472] [0,1] D====eER . . add.w r0, r1, #16 [0,2] D=====eeeeeER. stm.w r0, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} [0,3] .DeE--------R. add r2, sp, #16 [0,4] .D=====eE---R. str r2, [r1, #56] [0,5] .D======eE--R. str r3, [r1, #60] [0,6] . D---------R. mrs r0, apsr [0,7] . DeeE------R. bfi r0, lr, #5, #1 [0,8] . D======eE-R. str r0, [r1, #64] [0,9] . DeE-------R. mov.w r0, #0 [0,10] . D======eER. str r0, [r1, #68] [0,11] . D======E-R. vmrs r0, fpscr [0,12] . D=======eER str r0, [r1, #72] [0,13] . D=eE------R add.w r0, r1, #96
Running llvm-mca --timeline on the new code reports: 012 Index 0123456789
[0,0] DeeeeER . . ldr.w r1, [r1, #472] [0,1] D====eER . . add.w r0, r1, #16 [0,2] D=====eeeeeER stm.w r0, {r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} [0,3] .DeE--------R add r2, sp, #16 [0,4] .D=====eE---R strd r2, r3, [r1, #56] [0,5] .D----------R mrs r0, apsr [0,6] .DeeE-------R bfi r0, lr, #5, #1 [0,7] . D=====eE--R str r0, [r1, #64] [0,8] . DeE-------R mov.w r0, #0 [0,9] . D======eE-R str r0, [r1, #68] [0,10] . D=====E--R vmrs r0, fpscr [0,11] . D======eER str r0, [r1, #72] [0,12] . D=eE-----R add.w r0, r1, #96
Signed-off-by: Jinoh Kang jinoh.kang.kr@gmail.com --- dlls/ntdll/unix/signal_arm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/dlls/ntdll/unix/signal_arm.c b/dlls/ntdll/unix/signal_arm.c index a1bcb0ddd32..8c8750153df 100644 --- a/dlls/ntdll/unix/signal_arm.c +++ b/dlls/ntdll/unix/signal_arm.c @@ -1157,8 +1157,7 @@ __ASM_GLOBAL_FUNC( __wine_syscall_dispatcher, "add r0, r1, #0x10\n\t" "stm r0, {r4-r12,lr}\n\t" "add r2, sp, #0x10\n\t" - "str r2, [r1, #0x38]\n\t" - "str r3, [r1, #0x3c]\n\t" + "strd r2, r3, [r1, #0x38]\n\t" "mrs r0, CPSR\n\t" "bfi r0, lr, #5, #1\n\t" /* set thumb bit */ "str r0, [r1, #0x40]\n\t"