1.29. SIMD
jbe short loc_D6
mov ebx, [esp+10h+ar2]
mov [esp+10h+var_10], ecx
mov ecx, [esp+10h+ar1]
xor esi, esi
loc_C1: ; CODE XREF: f(int,int ,int ,int )+CD
mov edx, [ecx+esi4]
add edx, [ebx+esi4]
mov [eax+esi4], edx
inc esi
cmp esi, edi
jb short loc_C1
mov ecx, [esp+10h+var_10]
mov edx, [esp+10h+sz]
loc_D6: ; CODE XREF: f(int,int ,int ,int )+B2
mov esi, [esp+10h+ar2]
lea esi, [esi+edi4] ; is ar2+i*4 16-byte aligned?
test esi, 0Fh
jz short loc_109 ; yes!
mov ebx, [esp+10h+ar1]
mov esi, [esp+10h+ar2]
loc_ED: ; CODE XREF: f(int,int ,int ,int )+105
movdqu xmm1, xmmword ptr [ebx+edi4] ; ar1+i4
movdqu xmm0, xmmword ptr [esi+edi4] ; ar2+i4 is not 16-byte aligned, so load it to⤦
ÇXMM0
paddd xmm1, xmm0
movdqa xmmword ptr [eax+edi4], xmm1 ; ar3+i*4
add edi, 4
cmp edi, ecx
jb short loc_ED
jmp short loc_127
loc_109: ; CODE XREF: f(int,int ,int ,int *)+E3
mov ebx, [esp+10h+ar1]
mov esi, [esp+10h+ar2]
loc_111: ; CODE XREF: f(int,int ,int ,int )+125
movdqu xmm0, xmmword ptr [ebx+edi4]
paddd xmm0, xmmword ptr [esi+edi4]
movdqa xmmword ptr [eax+edi4], xmm0
add edi, 4
cmp edi, ecx
jb short loc_111
loc_127: ; CODE XREF: f(int,int ,int ,int )+107
; f(int,int ,int ,int )+164
cmp ecx, edx
jnb short loc_15B
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]
loc_133: ; CODE XREF: f(int,int ,int ,int )+13F
mov ebx, [esi+ecx4]
add ebx, [edi+ecx4]
mov [eax+ecx4], ebx
inc ecx
cmp ecx, edx
jb short loc_133
jmp short loc_15B
loc_143: ; CODE XREF: f(int,int ,int ,int )+17
; f(int,int ,int ,int )+3A ...
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]
xor ecx, ecx
loc_14D: ; CODE XREF: f(int,int ,int ,int *)+159