Reverse Engineering for Beginners

(avery) #1

CHAPTER 25. SIMD CHAPTER 25. SIMD


add eax, 1
add edi, 4
add edx, [esi]
add esi, 4
mov [ebx], edx
add ebx, 4
cmp ecx, eax
jg short loc_8048558
add esp, 0Ch
xor eax, eax
pop ebx
pop esi
pop edi
pop ebp
retn

loc_8048578: ; CODE XREF: f(int,int ,int ,int *)+52
cmp eax, esi
jnb loc_80484C1
jmp loc_80484F8
_Z1fiPiSS endp


Almost the same, however, not as meticulously as Intel C++.


25.1.2 Memory copy example


Let’s revisit the simple memcpy() example (14.2 on page 183):


#include <stdio.h>


void my_memcpy (unsigned char dst, unsigned char src, size_t cnt)
{
size_t i;
for (i=0; i<cnt; i++)
dst[i]=src[i];
};


And that’s what optimizations GCC 4.9.1 did:


Listing 25.1: Optimizing GCC 4.9.1 x64

my_memcpy:
; RDI = destination address
; RSI = source address
; RDX = size of block
test rdx, rdx
je .L41
lea rax, [rdi+16]
cmp rsi, rax
lea rax, [rsi+16]
setae cl
cmp rdi, rax
setae al
or cl, al
je .L13
cmp rdx, 22
jbe .L13
mov rcx, rsi
push rbp
push rbx
neg rcx
and ecx, 15
cmp rcx, rdx
cmova rcx, rdx
xor eax, eax
test rcx, rcx
je .L4
movzx eax, BYTE PTR [rsi]
cmp rcx, 1

Free download pdf