|
Message-ID: <DS7PR12MB5765FAA341DFA3E7D08BC9C0CB742@DS7PR12MB5765.namprd12.prod.outlook.com> Date: Tue, 23 Jan 2024 00:52:01 -0800 From: Fangrui Song <i@...kray.me> To: Rich Felker <dalias@...c.org> Cc: musl@...ts.openwall.com, Tatsuyuki Ishi <ishitatsuyuki@...il.com> Subject: Re: Draft riscv64 TLSDESC implementation On Sun, Jan 21, 2024 at 7:41 PM Tatsuyuki Ishi <ishitatsuyuki@...il.com> wrote: > > > On Jan 22, 2024, at 9:03, Rich Felker <dalias@...c.org> wrote: > > > > On Sun, Jan 21, 2024 at 03:48:55PM -0800, Fangrui Song wrote: > >> On Sun, Jan 21, 2024 at 2:28 PM Rich Felker <dalias@...c.org> wrote: > >>> > >>> On Tue, Aug 22, 2023 at 01:38:21PM -0400, Rich Felker wrote: > >>>> The psABI work is not finalized, but based on the current status of > >>>> https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373, I think > >>>> the attached is a valid (but untested) implementation of TLSDESC for > >>>> riscv64. Actually activating it requires also adding the relocation > >>>> type macro to riscv64/reloc.h. > >>>> > >>>> If any rv folks could look it over and make sure I haven't made any > >>>> stupid asm errors or missed any obvious optimizations, that would help > >>>> to quickly get this merged when the psABI is finalized. > >>>> > >>>> Rich > >>> > >>>> .text > >>>> .global __tlsdesc_static > >>>> .hidden __tlsdesc_static > >>>> .type __tlsdesc_static,%function > >>>> __tlsdesc_static: > >>>> ld a0,8(a0) > >>>> jr t0 > >>>> > >>>> .global __tlsdesc_dynamic > >>>> .hidden __tlsdesc_dynamic > >>>> .type __tlsdesc_dynamic,%function > >>>> __tlsdesc_dynamic: > >>>> add sp,sp,-8 > >>>> sd t1,(sp) > >>>> sd t2,8(sp) > >>>> > >>>> ld t2,-8(tp) # t2=dtv > >>>> > >>>> ld a0,8(a0) # a0=&{modidx,off} > >>>> ld t1,8(a0) # t1=off > >>>> ld a0,(a0) # a0=modidx > >>>> sll a0,a0,3 # a0=8*modidx > >>>> > >>>> add a0,a0,t2 # a0=dtv+8*modidx > >>>> ld a0,(a0) # a0=dtv[modidx] > >>>> add a0,a0,t1 # a0=dtv[modidx]+off > >>>> sub a0,a0,tp # a0=dtv[modidx]+off-tp > >>>> > >>>> ld t1,(sp) > >>>> ld t2,8(sp) > >>>> add sp,sp,8 > >>>> jr t0 > >>> > >>> Any feedback on this? Offhand, it looks like adjusting sp by 8 is > >>> wrong and that should be 16. Anything else? Does anyone have recent > >>> enough tooling to test this? > >> > >> Tatsuyuki, do you have links to the latest version of > >> gcc/binutils/glibc patches? > >> Downloading patches from these mailing lists is probably a large > >> hurdle for many users, so having the relevant repositories online may > >> help. > >> > >> mold has implemented RISC-V TLSDESC. > >> > >> On the LLVM side, I have reviewed > >> https://github.com/llvm/llvm-project/pull/66915 and am waiting for it > >> to land, before I can check the lld status. > > > > To test this, drop it in src/ldso/riscv64/tlsdesc.s, and add to > > arch/riscv64/reloc.h: > > > > #define REL_TLSDESC R_RISCV_TLSDESC > > > > or whatever the reloc name is (I don't think it's in elf.h yet so you > > probably need to either add it there too or just hard-code the number > > for testing). > > > > Updated version with the sp bugfix attached. > > The assembly looks fine to me. (It’s nice that musl don’t need to bother with save/restore at all since DTVs are initialized eagerly.) > For the patches mentioned in the other thread, most tests were done with glibc’s portable testsuite (tst-elf-*). > If musl has a similar one, you should be able to run it with my GCC / binutils fork (with either --with_tls=desc at configure time or -mtls-dialect=desc at compile time). > > Tatsuyuki. > > > Rich > > <tlsdesc.s> > I have verified that the patch works using a runtime test under qemu-user. I use Paul Kirth's pending LLVM codegen/assembly patch and my pending lld patch:) https://gist.github.com/MaskRay/7ad19393fdb0834540db6e7b6b02fa56 . Copied instructions below: Patch musl ```diff --- c/arch/riscv64/reloc.h +++ i/arch/riscv64/reloc.h @@ -17,6 +17,7 @@ #define REL_DTPMOD R_RISCV_TLS_DTPMOD64 #define REL_DTPOFF R_RISCV_TLS_DTPREL64 #define REL_TPOFF R_RISCV_TLS_TPREL64 +#define REL_TLSDESC 12 #define CRTJMP(pc,sp) __asm__ __volatile__( \ "mv sp, %1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" ) diff --git c/src/ldso/riscv64/tlsdesc.s i/src/ldso/riscv64/tlsdesc.s new file mode 100644 index 00000000..56d1ce89 --- /dev/null +++ i/src/ldso/riscv64/tlsdesc.s @@ -0,0 +1,33 @@ +.text +.global __tlsdesc_static +.hidden __tlsdesc_static +.type __tlsdesc_static,%function +__tlsdesc_static: + ld a0,8(a0) + jr t0 + +.global __tlsdesc_dynamic +.hidden __tlsdesc_dynamic +.type __tlsdesc_dynamic,%function +__tlsdesc_dynamic: + add sp,sp,-16 + sd t1,(sp) + sd t2,8(sp) + + ld t2,-8(tp) # t2=dtv + + ld a0,8(a0) # a0=&{modidx,off} + ld t1,8(a0) # t1=off + ld a0,(a0) # a0=modidx + sll a0,a0,3 # a0=8*modidx + + add a0,a0,t2 # a0=dtv+8*modidx + ld a0,(a0) # a0=dtv[modidx] + add a0,a0,t1 # a0=dtv[modidx]+off + sub a0,a0,tp # a0=dtv[modidx]+off-tp + + ld t1,(sp) + ld t2,8(sp) + add sp,sp,16 + jr t0 + ``` ```sh (mkdir -p out/rv64 && cd out/rv64 && ../../configure --target=riscv64-linux-gnu && make -j 50) ``` Adjust `~/musl/out/rv64/lib/musl-gcc.specs` and update `~/musl/out/rv64/obj/musl-gcc` ```sh cat > ~/musl/out/rv64/obj/musl-gcc <<eof #!/bin/sh exec "${REALGCC:-riscv64-linux-gnu-gcc}" "$@" -specs ~/musl/out/rv64/lib/musl-gcc.specs eof ``` Use the test at the end of https://maskray.me/blog/2021-02-14-all-about-thread-local-storage ```sh cat > ./a.c <<eof #include <assert.h> int foo(); int bar(); int main() { assert(foo() == 2); assert(foo() == 4); assert(bar() == 2); assert(bar() == 4); } eof cat > ./b.c <<eof #include <stdio.h> __thread int tls0; extern __thread int tls1; int foo() { return ++tls0 + ++tls1; } static __thread int tls2, tls3; int bar() { return ++tls2 + ++tls3; } eof echo '__thread int tls1;' > ./c.c sed 's/ /\t/' > ./Makefile <<'eof' .MAKE.MODE = meta curDirOk=true CC := ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld LDFLAGS := -Wl,-rpath=. all: a0 a1 a2 run: all ./a0 && ./a1 && ./a2 b.o: bb.s /tmp/Rel/bin/clang --target=riscv64-linux -c bb.s -o $@ c.so: c.o; ${LINK.c} -shared $> -o $@ bc.so: b.o c.o; ${LINK.c} -shared $> -o $@ b.so: b.o c.so; ${LINK.c} -shared $> -o $@ a0: a.o b.o c.o; ${LINK.c} $> -o $@ a1: a.o b.so; ${LINK.c} $> -o $@ a2: a.o bc.so; ${LINK.c} $> -o $@ eof ``` Compile b.c to bb.s. Replace general dynamic code sequences (e.g. `la.tls.gd a0,tls0; call __tls_get_addr@...`) with TLSDESC, e.g. ``` .Ltlsdesc_hi0: auipc a0, %tlsdesc_hi(tls0) ld a1, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a0) addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0) jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0) add a0, a0, tp ``` Apply LLVM CodeGen/assembly and lld patch (https://github.com/llvm/llvm-project/pull/79099) to llvm-project. Build clang and lld. Create an alias `bin/ld.lld` to be used with `-Bbin -fuse-ld=lld`. `bmake run` => succeeded! % bmake run ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -c a.c /tmp/Rel/bin/clang --target=riscv64-linux -c bb.s -o b.o ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -c c.c ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. a.o b.o c.o -o a0 ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. -shared c.o -o c.so ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. -shared b.o c.so -o b.so ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. a.o b.so -o a1 ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. -shared b.o c.o -o bc.so ~/musl/out/rv64/obj/musl-gcc -O1 -g -fpic -Bbin -fuse-ld=lld -g -Wl,-rpath=. a.o bc.so -o a2 ./a0 && ./a1 && ./a2
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.