Reverse Engineering for Beginners

(avery) #1

CHAPTER 25. SIMD CHAPTER 25. SIMD


movdqu xmm0, xmmword ptr [esi+edi*4] ; ar2+i*4 is not 16-byte aligned, so load it
to XMM0
paddd xmm1, xmm0
movdqa xmmword ptr [eax+edi*4], xmm1 ; ar3+i*4
add edi, 4
cmp edi, ecx
jb short loc_ED
jmp short loc_127

loc_109: ; CODE XREF: f(int,int ,int ,int *)+E3
mov ebx, [esp+10h+ar1]
mov esi, [esp+10h+ar2]


loc_111: ; CODE XREF: f(int,int ,int ,int )+125
movdqu xmm0, xmmword ptr [ebx+edi
4]
paddd xmm0, xmmword ptr [esi+edi4]
movdqa xmmword ptr [eax+edi
4], xmm0
add edi, 4
cmp edi, ecx
jb short loc_111


loc_127: ; CODE XREF: f(int,int ,int ,int )+107
; f(int,int
,int ,int )+164
cmp ecx, edx
jnb short loc_15B
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]


loc_133: ; CODE XREF: f(int,int ,int ,int )+13F
mov ebx, [esi+ecx
4]
add ebx, [edi+ecx4]
mov [eax+ecx
4], ebx
inc ecx
cmp ecx, edx
jb short loc_133
jmp short loc_15B


loc_143: ; CODE XREF: f(int,int ,int ,int )+17
; f(int,int
,int ,int )+3A ...
mov esi, [esp+10h+ar1]
mov edi, [esp+10h+ar2]
xor ecx, ecx


loc_14D: ; CODE XREF: f(int,int ,int ,int )+159
mov ebx, [esi+ecx
4]
add ebx, [edi+ecx4]
mov [eax+ecx
4], ebx
inc ecx
cmp ecx, edx
jb short loc_14D


loc_15B: ; CODE XREF: f(int,int ,int ,int )+A
; f(int,int
,int ,int )+129 ...
xor eax, eax
pop ecx
pop ebx
pop esi
pop edi
retn


loc_162: ; CODE XREF: f(int,int ,int ,int )+8C
; f(int,int
,int ,int )+9F
xor ecx, ecx
jmp short loc_127
?f@@YAHHPAH00@Z endp


The SSE2-related instructions are:



  • MOVDQU(Move Unaligned Double Quadword)— just loads 16 bytes from memory into a XMM-register.

  • PADDD(Add Packed Integers)— adds 4 pairs of 32-bit numbers and leaves the result in the first operand. By the way, no

Free download pdf