Reverse Engineering for Beginners

(avery) #1

CHAPTER 25. SIMD CHAPTER 25. SIMD


mov eax, 13
jmp .L4

25.2 SIMDstrlen()implementation.


It has to be noted that theSIMDinstructions can be inserted in C/C++ code via special macros^7. For MSVC, some of them are
located in theintrin.hfile.


It is possible to implement thestrlen()function^8 using SIMD instructions that works 2-2.5 times faster than the common
implementation. This function loads 16 characters into a XMM-register and check each against zero^9.


size_t strlen_sse2(const char str)
{
register size_t len = 0;
const char
s=str;
bool str_is_aligned=(((unsigned int)str)&0xFFFFFFF0) == (unsigned int)str;


if (str_is_aligned==false)
return strlen (str);

__m128i xmm0 = _mm_setzero_si128();
__m128i xmm1;
int mask = 0;

for (;;)
{
xmm1 = _mm_load_si128((__m128i *)s);
xmm1 = _mm_cmpeq_epi8(xmm1, xmm0);
if ((mask = _mm_movemask_epi8(xmm1)) != 0)
{
unsigned long pos;
_BitScanForward(&pos, mask);
len += (size_t)pos;

break;
}
s += sizeof(__m128i);
len += sizeof(__m128i);
};

return len;
}


Let’s compile it in MSVC 2010 with/Oxoption:


Listing 25.2: Optimizing MSVC 2010

_pos$75552 = -4 ; size = 4
_str$ = 8 ; size = 4
?strlen_sse2@@YAIPBD@Z PROC ; strlen_sse2


push ebp
mov ebp, esp
and esp, -16 ; fffffff0H
mov eax, DWORD PTR _str$[ebp]
sub esp, 12 ; 0000000cH
push esi
mov esi, eax
and esi, -16 ; fffffff0H
xor edx, edx
mov ecx, eax
cmp esi, eax
je SHORT $LN4@strlen_sse
lea edx, DWORD PTR [eax+1]

(^7) MSDN: MMX, SSE, and SSE2 Intrinsics
(^8) strlen() —standard C library function for calculating string length
(^9) The example is based on source code from:http://go.yurichev.com/17330.

Free download pdf