|
Message-Id: <1423761423-30050-1-git-send-email-vda.linux@googlemail.com> Date: Thu, 12 Feb 2015 18:17:02 +0100 From: Denys Vlasenko <vda.linux@...glemail.com> To: musl@...ts.openwall.com, Rich Felker <dalias@...c.org> Cc: Denys Vlasenko <vda.linux@...glemail.com> Subject: [PATCH 1/2] x86_64/memset: avoid multiply insn if possible memset is very, very often called with fill=0, and 64-bit imul is expensive on many CPUs. Avoid it if fill=0. Also avoid multiply on "short memset" codepath if possible, and when we do need it, use 32-bit one, which is cheaper on many CPUs. Signed-off-by: Denys Vlasenko <vda.linux@...glemail.com> --- src/string/x86_64/memset.s | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/src/string/x86_64/memset.s b/src/string/x86_64/memset.s index 3cc8fcf..523caa0 100644 --- a/src/string/x86_64/memset.s +++ b/src/string/x86_64/memset.s @@ -1,13 +1,12 @@ .global memset .type memset,@function memset: - movzbl %sil,%esi - mov $0x101010101010101,%rax - # 64-bit imul has 3-7 cycles latency, launch early - imul %rsi,%rax - + movzbq %sil,%rax cmp $16,%rdx - jb 1f + jb .Less_than_16 + test %esi,%esi + jnz .L_widen_rax # unlikely +.L_widened: lea -1(%rdx),%rcx mov %rdi,%r8 @@ -18,26 +17,35 @@ memset: mov %r8,%rax ret -1: test %edx,%edx - jz 1f +.L_widen_rax: + # 64-bit imul has 3-7 cycles latency + mov $0x101010101010101,%rsi + imul %rsi,%rax + jmp .L_widened + +.Less_than_16: + test %edx,%edx + jz .L_ret mov %al,(%rdi) mov %al,-1(%rdi,%rdx) cmp $2,%edx - jbe 1f + jbe .L_ret mov %al,1(%rdi) mov %al,-2(%rdi,%rdx) + # 32-bit imul has 3-4 cycles latency + imul $0x1010101,%eax cmp $4,%edx - jbe 1f + jbe .L_ret mov %eax,(%rdi) mov %eax,-4(%rdi,%rdx) cmp $8,%edx - jbe 1f + jbe .L_ret mov %eax,4(%rdi) mov %eax,-8(%rdi,%rdx) - -1: mov %rdi,%rax +.L_ret: + mov %rdi,%rax ret -- 1.8.1.4
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.