Skip to content

Commit

Permalink
Improved memset
Browse files Browse the repository at this point in the history
  • Loading branch information
tswilliamson authored and tari committed Jul 31, 2020
1 parent d1d30ce commit 631b3eb
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 4 deletions.
70 changes: 70 additions & 0 deletions libfxcg/misc/memset.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
.global _memset
.text

_memset:
tst r6, r6
bt .zero

/* We will fill from the end */
mov r4, r3
add r6, r4

/* When setting less than 64 bytes, use the naive method */
mov #64, r0
cmp/ge r6, r0
bt _naive_memset

mov #3, r2

/* Make a 4-byte filler */
mov r5, r0
shll8 r5
or r5, r0
mov r0, r5
shll16 r5
or r5, r0

_memset_align:
/* 4-align the destination */
mov.b r0, @-r4
tst r2, r4
bf/s _memset_align
dt r6

mov #40, r2

.aligned4_32:
add #-32, r4
add #-32, r6
mov.l r0, @(28,r4)
mov.l r0, @(24,r4)
mov.l r0, @(20,r4)
mov.l r0, @(16,r4)
mov.l r0, @(12,r4)
mov.l r0, @(8,r4)
mov.l r0, @(4,r4)
cmp/ge r6, r2
bf/s .aligned4_32
mov.l r0, @r4

mov #8, r2

.aligned4_4:
mov.l r0, @-r4
cmp/ge r6, r2
bf/s .aligned4_4
add #-4, r6

_naive_memset:
/* Tight loop copy one byte */
dt r6
bf/s _naive_memset
mov.b r5, @-r4

.end:
rts
mov r3, r0

.zero:
rts
mov r4, r0
4 changes: 0 additions & 4 deletions libfxcg/syscalls/memset.S

This file was deleted.

0 comments on commit 631b3eb

Please sign in to comment.