Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160615195345.1af40b40@ncopa-desktop.alpinelinux.org>
Date: Wed, 15 Jun 2016 19:53:45 +0200
From: Natanael Copa <ncopa@...inelinux.org>
To: Rich Felker <dalias@...c.org>
Cc: musl@...ts.openwall.com
Subject: Re: [PATCH 2/2] refactor name_from_dns

On Fri, 27 May 2016 00:40:57 -0400
Rich Felker <dalias@...c.org> wrote:

> On Wed, May 25, 2016 at 11:22:14AM +0200, Natanael Copa wrote:
> > loop over an address family / resource record mapping to avoid
> > repetitive code.
> > ---
> >  src/network/lookup_name.c | 27 +++++++++++++--------------
> >  1 file changed, 13 insertions(+), 14 deletions(-)
> > 
> > diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
> > index d3d97b4..99364ab 100644
> > --- a/src/network/lookup_name.c
> > +++ b/src/network/lookup_name.c
> > @@ -141,20 +141,19 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static
> >  	int qlens[2], alens[2];
> >  	int i, nq = 0;
> >  	struct dpc_ctx ctx = { .addrs = buf, .canon = canon };
> > -
> > -	if (family != AF_INET6) {
> > -		qlens[nq] = __res_mkquery(0, name, 1, RR_A, 0, 0, 0,
> > -			qbuf[nq], sizeof *qbuf);
> > -		if (qlens[nq] == -1)
> > -			return EAI_NONAME;
> > -		nq++;
> > -	}
> > -	if (family != AF_INET) {
> > -		qlens[nq] = __res_mkquery(0, name, 1, RR_AAAA, 0, 0, 0,
> > -			qbuf[nq], sizeof *qbuf);
> > -		if (qlens[nq] == -1)
> > -			return EAI_NONAME;
> > -		nq++;
> > +	struct { int af; int rr; } afrr[2] = {
> > +		{ .af = AF_INET6, .rr = RR_A },
> > +		{ .af = AF_INET, .rr = RR_AAAA },
> > +	};
> > +
> > +	for (i=0; i<2; i++) {
> > +		if (family != afrr[i].af) {
> > +			qlens[nq] = __res_mkquery(0, name, 1, afrr[i].rr,
> > +				0, 0, 0, qbuf[nq], sizeof *qbuf);
> > +			if (qlens[nq] == -1)
> > +				return EAI_NONAME;
> > +			nq++;
> > +		}
> 
> Can you compare the codegen for this as written vs making the table
> static-const? I wonder which gcc does better with. My guess would be
> static-const is better if it actually emits the table and loops over
> it, but auto storage like you did might be better if that helps it
> just unroll it plugging in the values.

as shown above (without static const) gcc will make it loop (there is
only one call to __res_mkquery):

name_from_dns:
        pushq   %r15
        pushq   %r14
        movq    %rdi, %r9
        pushq   %r13
        pushq   %r12
        movl    %ecx, %r11d
        pushq   %rbp
        pushq   %rbx
        movl    $6, %ecx
        movq    %r8, %r10
        xorl    %r12d, %r12d
        xorl    %ebp, %ebp
        subq    $1720, %rsp
        leaq    128(%rsp), %r13
        leaq    688(%rsp), %rbx
        leaq    104(%rsp), %rdi
        leaq    88(%rsp), %r14
        movq    %rdx, (%rsp)
        movl    $10, 88(%rsp)
        leaq    280(%r13), %rax
        movq    %r13, 56(%rsp)
        movq    %rbx, 72(%rsp)
        movl    $1, 92(%rsp)
        movl    $2, 96(%rsp)
        movq    %rax, 64(%rsp)
        leaq    512(%rbx), %rax
        movl    $28, 100(%rsp)
        movq    %rax, 80(%rsp)
        xorl    %eax, %eax
        rep stosl
        leaq    104(%rsp), %rax
        movq    %r9, 104(%rsp)
        movq    %rsi, 112(%rsp)
        movq    %rax, 8(%rsp)
.L17:
        cmpl    %r11d, (%r12,%r14)
        je      .L14
        movslq  %ebp, %r15
        movq    %r10, 24(%rsp)
        movl    %r11d, 20(%rsp)
        imulq   $280, %r15, %rax
        pushq   %rsi
        pushq   $280
        movl    4(%r14,%r12), %ecx
        xorl    %r9d, %r9d
        xorl    %r8d, %r8d
        xorl    %edi, %edi
        movl    $1, %edx
        addq    %r13, %rax
        pushq   %rax
        pushq   $0
        movq    32(%rsp), %rsi
        call    __res_mkquery
        movl    %eax, 72(%rsp,%r15,4)
        addq    $32, %rsp
        incl    %eax
        movl    20(%rsp), %r11d
        movq    24(%rsp), %r10
        jne     .L15
.L20:
        movl    $-2, %edx
        jmp     .L16
.L15:
        incl    %ebp
.L14:
        addq    $8, %r12
        cmpq    $16, %r12
        jne     .L17
        leaq    48(%rsp), %r12
        leaq    72(%rsp), %rcx
        leaq    40(%rsp), %rdx
        leaq    56(%rsp), %rsi
        pushq   %rax
        pushq   %r10
        movl    $512, %r9d
        movq    %r12, %r8
        movl    %ebp, %edi
        call    __res_msend_rc
        xorl    %r13d, %r13d
        testl   %eax, %eax
        popq    %rdx
        movl    $-11, %edx
        popq    %rcx
        js      .L16
.L18:
        cmpl    %r13d, %ebp
        jle     .L31
        movl    (%r12,%r13,4), %esi
        movq    8(%rsp), %rcx
        leaq    dns_parse_callback(%rip), %rdx
        movq    %rbx, %rdi
        incq    %r13
        addq    $512, %rbx
        call    __dns_parse
        jmp     .L18
.L31:
        movl    120(%rsp), %edx
        testl   %edx, %edx
        jne     .L16
        cmpl    $3, 48(%rsp)
        jle     .L23
        movb    691(%rsp), %al
        andl    $15, %eax
        cmpb    $2, %al
        je      .L23
        testb   %al, %al
        je      .L20
        cmpb    $3, %al
        movl    $-4, %eax
        cmovne  %eax, %edx
        jmp     .L16
.L23:
...



with "static const struct { int af; int rr; } afrr[2] = {" gcc will
actually unroll it (there are 2 calls to __res_mkquery):

name_from_dns:
        pushq   %r15
        pushq   %r14
        movq    %rdi, %r9
        pushq   %r13
        pushq   %r12
        movl    %ecx, %r15d
        pushq   %rbp
        pushq   %rbx
        movl    $6, %ecx
        movq    %rdx, %r13
        movq    %r8, %r14
        subq    $1688, %rsp
        leaq    96(%rsp), %r12
        leaq    656(%rsp), %rbx
        leaq    72(%rsp), %rdi
        leaq    280(%r12), %rax
        movq    %r12, 40(%rsp)
        movq    %rbx, 56(%rsp)
        movq    %rax, 48(%rsp)
        leaq    512(%rbx), %rax
        movq    %rax, 64(%rsp)
        xorl    %eax, %eax
        cmpl    $10, %r15d
        rep stosl
        leaq    72(%rsp), %rax
        movq    %r9, 72(%rsp)
        movq    %rsi, 80(%rsp)
        movq    %rax, 8(%rsp)
        je      .L21
        pushq   %rdi
        pushq   $280
        xorl    %r9d, %r9d
        pushq   %r12
        pushq   $0
        xorl    %r8d, %r8d
        xorl    %edi, %edi
        movl    $1, %ecx
        movl    $1, %edx
        movq    %r13, %rsi
        call    __res_mkquery
        movl    %eax, 56(%rsp)
        addq    $32, %rsp
        incl    %eax
        je      .L15
        cmpl    $2, %r15d
        movl    $1, %ebp
        jne     .L36
        jmp     .L16
.L21:
        xorl    %ebp, %ebp
.L36:
        movslq  %ebp, %r15
        pushq   %rsi
        pushq   $280
        imulq   $280, %r15, %rax
        xorl    %r9d, %r9d
        xorl    %r8d, %r8d
        xorl    %edi, %edi
        movl    $28, %ecx
        movl    $1, %edx
        movq    %r13, %rsi
        incl    %ebp
        addq    %rax, %r12
        pushq   %r12
        pushq   $0
        call    __res_mkquery
        movl    %eax, 56(%rsp,%r15,4)
        addq    $32, %rsp
        incl    %eax
        jne     .L16
.L15:
        movl    $-2, %edx
        jmp     .L18
.L16:
        leaq    32(%rsp), %r12
        leaq    56(%rsp), %rcx
        leaq    24(%rsp), %rdx
        leaq    40(%rsp), %rsi
        pushq   %rax
        pushq   %r14
        movl    $512, %r9d
        movq    %r12, %r8
        movl    %ebp, %edi
        call    __res_msend_rc
        xorl    %r14d, %r14d
        testl   %eax, %eax
        popq    %rdx
        movl    $-11, %edx
        popq    %rcx
        js      .L18
.L19:
        cmpl    %r14d, %ebp
        jle     .L38
        movl    (%r12,%r14,4), %esi
        movq    8(%rsp), %rcx
        leaq    dns_parse_callback(%rip), %rdx
        movq    %rbx, %rdi
        incq    %r14
        addq    $512, %rbx
        call    __dns_parse
        jmp     .L19
.L38:
        movl    88(%rsp), %edx
        testl   %edx, %edx
        jne     .L18
        cmpl    $3, 32(%rsp)
        jle     .L24
        movb    659(%rsp), %al
        andl    $15, %eax
        cmpb    $2, %al
        je      .L24
        testb   %al, %al
        je      .L15
        cmpb    $3, %al
        movl    $-4, %eax
        cmovne  %eax, %edx
        jmp     .L18
.L24:


> 
> Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.