Assembly Language for Beginners

(nextflipdebug2) #1

1.29. SIMD


jmp .L4
.L18:
mov eax, 4
jmp .L4
.L19:
mov eax, 5
jmp .L4
.L20:
mov eax, 6
jmp .L4
.L21:
mov eax, 7
jmp .L4
.L22:
mov eax, 8
jmp .L4
.L23:
mov eax, 9
jmp .L4
.L24:
mov eax, 10
jmp .L4
.L25:
mov eax, 11
jmp .L4
.L26:
mov eax, 12
jmp .L4
.L27:
mov eax, 13
jmp .L4


1.29.2 SIMDstrlen()implementation


It has to be noted that theSIMDinstructions can be inserted in C/C++ code via special macros^183. For
MSVC, some of them are located in theintrin.hfile.


It is possible to implement thestrlen()function^184 using SIMD instructions that works 2-2.5 times faster
than the common implementation. This function loads 16 characters into a XMM-register and check each
against zero^185.


size_t strlen_sse2(const char str)
{
register size_t len = 0;
const char
s=str;
bool str_is_aligned=(((unsigned int)str)&0xFFFFFFF0) == (unsigned int)str;


if (str_is_aligned==false)
return strlen (str);

__m128i xmm0 = _mm_setzero_si128();
__m128i xmm1;
int mask = 0;

for (;;)
{
xmm1 = _mm_load_si128((__m128i *)s);
xmm1 = _mm_cmpeq_epi8(xmm1, xmm0);
if ((mask = _mm_movemask_epi8(xmm1)) != 0)
{
unsigned long pos;
_BitScanForward(&pos, mask);
len += (size_t)pos;

(^183) MSDN: MMX, SSE, and SSE2 Intrinsics
(^184) strlen() —standard C library function for calculating string length
(^185) The example is based on source code from:http://go.yurichev.com/17330.

Free download pdf