diff --git a/Makefile b/Makefile index e1f38a3..fed2f26 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ -CFLAGS=-O2 -Wall -g -ftree-vectorize +CFLAGS=-O2 -Wall -g -static NAME=mbw TARFILE=${NAME}.tar.gz mbw: mbw.c - $(CC) -c -o mbw.o mbw.c - $(CC) -o mbw mbw.o -lpthread + $(CC) $(CFLAGS) -c -o mbw.o mbw.c + $(CC) $(CFLAGS) -o mbw mbw.o -lpthread clean: rm -f mbw mbw.o diff --git a/mbw.c b/mbw.c index 8a24ed6..b06a1f6 100644 --- a/mbw.c +++ b/mbw.c @@ -80,6 +80,7 @@ void run_test(int tid); __attribute__((noinline)) void *arch_memcpy(void *dest, const void *src, size_t n) { + register void *ret asm ("rax") = dest; asm volatile ("movq %rdi, %rax\n\t" "movq %rdx, %rcx\n\t" "shrq $3, %rcx\n\t" @@ -88,7 +89,7 @@ __attribute__((noinline)) void *arch_memcpy(void *dest, const void *src, size_t "movl %edx, %ecx\n\t" "rep movsb\n\t" ); - return; + return ret; } #define CONFIG_MT_MAX_THREADS 4 @@ -342,6 +343,9 @@ void printout(double te, double mt, int type) case TEST_MCBLOCK: printf("Method: MCBLOCK\t"); break; + case TEST_ARCH_MEMCPY: + printf("Method: AMEMCPY\t"); + break; } printf("Elapsed: %.5f\t", te); printf("MiB: %.5f\t", mt); @@ -380,9 +384,7 @@ void run_test(int tid) int main(int argc, char **argv) { unsigned int long_size=0; - double te, te_sum; /* time elapsed */ int i; - long *a, *b; /* the two arrays to be copied from/to */ int o; /* getopt options */ unsigned long testno;