Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <DFEFCEFB42FD4CBB9CD45CD321B57F1A@H270>
Date: Wed, 4 Aug 2021 12:02:58 +0200
From: "Stefan Kanthak" <stefan.kanthak@...go.de>
To: "Szabolcs Nagy" <nsz@...t70.net>
Cc: <musl@...ts.openwall.com>
Subject: Re: [Patch] src/math/i386/remquo.s: remove conditional branch, shorter bit twiddling

"Szabolcs Nagy" <nsz@...t70.net> wrote:


>* Stefan Kanthak <stefan.kanthak@...go.de> [2021-08-01 17:59:52 +0200]:
>> Halve the number of instructions (from 12 to 6) to fetch the
>> (3-bit partial) quotient from the FPU flags C0:C3:C1, and
>> perform its negation without conditional branch.
> 
> i haven't tested it but it looks good.

This is basically well-tested code I wrote about 20 years ago
for my own NOMSVCRT.LIB: I always found the bit twiddling of
J.T.Conklins code rather awful.

> i think we should not tweak x87 asm code too much though.
> it can introduce bugs and there are not many users of it.
> i think only the size saving can justify keeping any i386
> math code at all.

>From your own FAQ <http://www.musl-libc.org/faq.html>

| When will it be finished?
| When there's nothing left to remove.

The change just follows by removing 6 LOC/instructions.-)

> but i'm not against committing this.
> thanks for the patch.

regards
Stefan

>> --- -/math/i386/remquo.s
>> +++ +/math/i386/remquo.s
>> @@ -2,49 +2,44 @@
>>  .type remquof,@function
>>  remquof:
>>         mov 12(%esp),%ecx
>> +       mov 8(%esp),%eax
>> +       xor 4(%esp),%eax
>>         flds 8(%esp)
>>         flds 4(%esp)
>> -       mov 11(%esp),%dh
>> -       xor 7(%esp),%dh
>> -       jmp 1f
>> +       jmp 0f
>> 
>>  .global remquol
>>  .type remquol,@function
>>  remquol:
>>         mov 28(%esp),%ecx
>> +       mov 24(%esp),%eax
>> +       xor 12(%esp),%eax
>> +       cwtl
>>         fldt 16(%esp)
>>         fldt 4(%esp)
>> -       mov 25(%esp),%dh
>> -       xor 13(%esp),%dh
>> -       jmp 1f
>> +       jmp 0f
>> 
>>  .global remquo
>>  .type remquo,@function
>>  remquo:
>>         mov 20(%esp),%ecx
>> +       mov 16(%esp),%eax
>> +       xor 8(%esp),%eax
>>         fldl 12(%esp)
>>         fldl 4(%esp)
>> -       mov 19(%esp),%dh
>> -       xor 11(%esp),%dh
>> +0:     cltd
>>  1:     fprem1
>>         fnstsw %ax
>>         sahf
>>         jp 1b
>>         fstp %st(1)
>> -       mov %ah,%dl
>> -       shr %dl
>> -       and $1,%dl
>> -       mov %ah,%al
>> -       shr $5,%al
>> -       and $2,%al
>> -       or %al,%dl
>> -       mov %ah,%al
>> -       shl $2,%al
>> -       and $4,%al
>> -       or %al,%dl
>> -       test %dh,%dh
>> -       jns 1f
>> -       neg %dl
>> -1:     movsbl %dl,%edx
>> -       mov %edx,(%ecx)
>> +       adc %al,%al
>> +       shl $2,%ah
>> +       adc %al,%al
>> +       shl $5,%ah
>> +       adc %al,%al
>> +       and $7,%eax
>> +       xor %edx,%eax
>> +       sub %edx,%eax
>> +       mov %eax,(%ecx)
>>         ret

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.