CHAPTER 14. LOOPS CHAPTER 14. LOOPS
; store byte at RDI+i:
mov BYTE PTR [rdi+rax], cl
inc rax ; i++
jmp .L2
.L5:
ret
Listing 14.11: GCC 4.9 ARM64 optimized for size (-Os)
my_memcpy:
; X0 = destination address
; X1 = source address
; X2 = size of block
; initialize counter (i) at 0
mov x3, 0
.L2:
; all bytes copied? exit then:
cmp x3, x2
beq .L5
; load byte at X1+i:
ldrb w4, [x1,x3]
; store byte at X1+i:
strb w4, [x0,x3]
add x3, x3, 1 ; i++
b .L2
.L5:
ret
Listing 14.12: Optimizing Keil 6/2013 (Thumb mode)
my_memcpy PROC
; R0 = destination address
; R1 = source address
; R2 = size of block
PUSH {r4,lr}
; initialize counter (i) at 0
MOVS r3,#0
; condition checked at the end of function, so jump there:
B |L0.12|
|L0.6|
; load byte at R1+i:
LDRB r4,[r1,r3]
; store byte at R1+i:
STRB r4,[r0,r3]
; i++
ADDS r3,r3,#1
|L0.12|
; i<size?
CMP r3,r2
; jump to the loop begin if its so:'
BCC |L0.6|
POP {r4,pc}
ENDP
14.2.2 ARM in ARM mode.
Keil in ARM mode takes full advantage of conditional suffixes:
Listing 14.13: Optimizing Keil 6/2013 (ARM mode)
my_memcpy PROC
; R0 = destination address
; R1 = source address
; R2 = size of block
; initialize counter (i) at 0