Assembly Language for Beginners

(nextflipdebug2) #1

1.29. SIMD


jbe short loc_D6
mov ebx, [esp+10h+ar2]
mov [esp+10h+var_10], ecx
mov ecx, [esp+10h+ar1]
xor esi, esi

loc_C1: ; CODE XREF: f(int,int ,int ,int )+CD
mov edx, [ecx+esi
4]
add edx, [ebx+esi4]
mov [eax+esi
4], edx
inc esi
cmp esi, edi
jb short loc_C1
mov ecx, [esp+10h+var_10]
mov edx, [esp+10h+sz]


loc_D6: ; CODE XREF: f(int,int ,int ,int )+B2
mov esi, [esp+10h+ar2]
lea esi, [esi+edi
4] ; is ar2+i*4 16-byte aligned?
test esi, 0Fh
jz short loc_109 ; yes!
mov ebx, [esp+10h+ar1]
mov esi, [esp+10h+ar2]


loc_ED: ; CODE XREF: f(int,int ,int ,int )+105
movdqu xmm1, xmmword ptr [ebx+edi
4] ; ar1+i4
movdqu xmm0, xmmword ptr [esi+edi
4] ; ar2+i4 is not 16-byte aligned, so load it to⤦
ÇXMM0
paddd xmm1, xmm0
movdqa xmmword ptr [eax+edi
4], xmm1 ; ar3+i*4
add edi, 4
cmp edi, ecx
jb short loc_ED
jmp short loc_127


loc_109: ; CODE XREF: f(int,int ,int ,int *)+E3
mov ebx, [esp+10h+ar1]
mov esi, [esp+10h+ar2]


loc_111: ; CODE XREF: f(int,int ,int ,int )+125
movdqu xmm0, xmmword ptr [ebx+edi
4]
paddd xmm0, xmmword ptr [esi+edi4]
movdqa xmmword ptr [eax+edi
4], xmm0
add edi, 4
cmp edi, ecx
jb short loc_111


loc_127: ; CODE XREF: f(int,int ,int ,int )+107
; f(int,int
,int ,int )+164
cmp ecx, edx
jnb short loc_15B
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]


loc_133: ; CODE XREF: f(int,int ,int ,int )+13F
mov ebx, [esi+ecx
4]
add ebx, [edi+ecx4]
mov [eax+ecx
4], ebx
inc ecx
cmp ecx, edx
jb short loc_133
jmp short loc_15B


loc_143: ; CODE XREF: f(int,int ,int ,int )+17
; f(int,int
,int ,int )+3A ...
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]
xor ecx, ecx


loc_14D: ; CODE XREF: f(int,int ,int ,int *)+159

Free download pdf