Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a052a74d-65a6-647f-3c82-af5876745e47@xen0n.name>
Date: Wed, 12 Oct 2022 16:22:29 +0800
From: WANG Xuerui <i@...0n.name>
To: 王洪亮 <wanghongliang@...ngson.cn>
Cc: musl@...ts.openwall.com
Subject: Re: add loongarch64 port v5

Hi,

Sorry for the delay in reviewing this. I haven't test-run the changes 
yet but I've gone through most of it and left some comments below.

Besides, MaskRay has hinted in a related LLVM patch [1] that the musl 
maintainers may want to review the current LoongArch spec where target 
triples are (more-or-less tentatively) defined and including musl ones 
([2], see Table 12). Maintainers, opinions?

[1]: https://reviews.llvm.org/D135751
[2]: 
https://loongson.github.io/LoongArch-Documentation/LoongArch-toolchain-conventions-EN.html

On 2022/8/3 16:15, 王洪亮 wrote:
> From 7c08ecde9237a0e5de182829a947f6b02cb52cf5 Mon Sep 17 00:00:00 2001
> From: wanghongliang <wanghongliang@...ngson.cn>
> Date: Wed, 3 Aug 2022 02:01:23 +0800
> Subject: [PATCH] add loongarch64 port v5.

Patch revision should be inside the PATCH label, because it's not supposed to be
a part of the commit message when merged.

You should generate the patch like `git format-patch -v5 master` and ideally
don't send it as an attachment, it makes inline replies cumbersome (e.g. I can't
review and compose the mail on my phone, which I often do).

> 
> Author: xiaojuanZhai <zhaixiaojuan@...ngson.cn>
> Author: meidanLi <limeidan@...ngson.cn>
> Author: guoqiChen <chenguoqi@...ngson.cn>
> Author: xiaolinZhao <zhaoxiaolin@...ngson.cn>
> Author: Fanpeng <fanpeng@...ngson.cn>
> Author: jiantaoShan <shanjiantao@...ngson.cn>
> Author: xuhuiQiang <qiangxuhui@...ngson.cn>
> Author: jingyunHua <huajingyun@...ngson.cn>
> Author: Liuxue <liuxue@...ngson.cn>
> Author: HongliangWang <wanghongliang@...ngson.cn>
> 
> Signed-off-by: wanghongliang <wanghongliang@...ngson.cn>

As I have pointed out earlier, Chinese names are NOT camelCase, please use the
native form of "Lastname Firstname" (or reversing the two) but never the current
form.

> ---
>  arch/loongarch64/atomic_arch.h             |  53 ++++
>  arch/loongarch64/bits/alltypes.h.in        |  18 ++
>  arch/loongarch64/bits/fenv.h               |  20 ++
>  arch/loongarch64/bits/float.h              |  16 ++
>  arch/loongarch64/bits/posix.h              |   2 +
>  arch/loongarch64/bits/ptrace.h             |   4 +
>  arch/loongarch64/bits/reg.h                |   2 +
>  arch/loongarch64/bits/setjmp.h             |   1 +
>  arch/loongarch64/bits/signal.h             |  80 ++++++
>  arch/loongarch64/bits/stat.h               |  18 ++
>  arch/loongarch64/bits/stdint.h             |  20 ++
>  arch/loongarch64/bits/syscall.h.in         | 303 +++++++++++++++++++++
>  arch/loongarch64/bits/user.h               |   5 +
>  arch/loongarch64/crt_arch.h                |  13 +
>  arch/loongarch64/pthread_arch.h            |  13 +
>  arch/loongarch64/reloc.h                   |  27 ++
>  arch/loongarch64/syscall_arch.h            | 137 ++++++++++
>  configure                                  |   1 +
>  crt/loongarch64/crti.s                     |  15 +
>  crt/loongarch64/crtn.s                     |  12 +
>  include/elf.h                              |  65 +++++
>  src/fenv/loongarch64/fenv.S                |  72 +++++
>  src/ldso/loongarch64/dlsym.s               |   7 +
>  src/setjmp/loongarch64/longjmp.S           |  37 +++
>  src/setjmp/loongarch64/setjmp.S            |  34 +++
>  src/signal/loongarch64/restore.s           |  10 +
>  src/signal/loongarch64/sigsetjmp.s         |  29 ++
>  src/thread/loongarch64/__set_thread_area.s |   7 +
>  src/thread/loongarch64/__unmapself.s       |   7 +
>  src/thread/loongarch64/clone.s             |  47 ++++
>  src/thread/loongarch64/syscall_cp.s        |  29 ++
>  31 files changed, 1104 insertions(+)
>  create mode 100644 arch/loongarch64/atomic_arch.h
>  create mode 100644 arch/loongarch64/bits/alltypes.h.in
>  create mode 100644 arch/loongarch64/bits/fenv.h
>  create mode 100644 arch/loongarch64/bits/float.h
>  create mode 100644 arch/loongarch64/bits/posix.h
>  create mode 100644 arch/loongarch64/bits/ptrace.h
>  create mode 100644 arch/loongarch64/bits/reg.h
>  create mode 100644 arch/loongarch64/bits/setjmp.h
>  create mode 100644 arch/loongarch64/bits/signal.h
>  create mode 100644 arch/loongarch64/bits/stat.h
>  create mode 100644 arch/loongarch64/bits/stdint.h
>  create mode 100644 arch/loongarch64/bits/syscall.h.in
>  create mode 100644 arch/loongarch64/bits/user.h
>  create mode 100644 arch/loongarch64/crt_arch.h
>  create mode 100644 arch/loongarch64/pthread_arch.h
>  create mode 100644 arch/loongarch64/reloc.h
>  create mode 100644 arch/loongarch64/syscall_arch.h
>  create mode 100644 crt/loongarch64/crti.s
>  create mode 100644 crt/loongarch64/crtn.s
>  create mode 100644 src/fenv/loongarch64/fenv.S
>  create mode 100644 src/ldso/loongarch64/dlsym.s
>  create mode 100644 src/setjmp/loongarch64/longjmp.S
>  create mode 100644 src/setjmp/loongarch64/setjmp.S
>  create mode 100644 src/signal/loongarch64/restore.s
>  create mode 100644 src/signal/loongarch64/sigsetjmp.s
>  create mode 100644 src/thread/loongarch64/__set_thread_area.s
>  create mode 100644 src/thread/loongarch64/__unmapself.s
>  create mode 100644 src/thread/loongarch64/clone.s
>  create mode 100644 src/thread/loongarch64/syscall_cp.s
> 
> diff --git a/arch/loongarch64/atomic_arch.h b/arch/loongarch64/atomic_arch.h
> new file mode 100644
> index 00000000..bf4805c9
> --- /dev/null
> +++ b/arch/loongarch64/atomic_arch.h
> @@ -0,0 +1,53 @@
> +#define a_ll a_ll
> +static inline int a_ll(volatile int *p)
> +{
> +	int v;
> +	__asm__ __volatile__ (
> +		"ll.w %0, %1"
> +		: "=r"(v)
> +		: "ZC"(*p));
> +	return v;
> +}
> +
> +#define a_sc a_sc
> +static inline int a_sc(volatile int *p, int v)
> +{
> +	int r;
> +	__asm__ __volatile__ (
> +		"sc.w %0, %1"
> +		: "=r"(r), "=ZC"(*p)
> +		: "0"(v) : "memory");
> +	return r;
> +}
> +
> +#define a_ll_p a_ll_p
> +static inline void *a_ll_p(volatile void *p)
> +{
> +	void *v;
> +	__asm__ __volatile__ (
> +		"ll.d %0, %1"
> +		: "=r"(v)
> +		: "ZC"(*(void *volatile *)p));
> +	return v;
> +}
> +
> +#define a_sc_p a_sc_p
> +static inline int a_sc_p(volatile void *p, void *v)
> +{
> +	long r;
> +	__asm__ __volatile__ (
> +		"sc.d %0, %1"
> +		: "=r"(r), "=ZC"(*(void *volatile *)p)
> +		: "0"(v)
> +		: "memory");
> +	return r;
> +}
> +
> +#define a_barrier a_barrier
> +static inline void a_barrier()
> +{
> +	__asm__ __volatile__ ("dbar 0" : : : "memory");
> +}
> +
> +#define a_pre_llsc a_barrier
> +#define a_post_llsc a_barrier
> diff --git a/arch/loongarch64/bits/alltypes.h.in b/arch/loongarch64/bits/alltypes.h.in
> new file mode 100644
> index 00000000..d1807aca
> --- /dev/null
> +++ b/arch/loongarch64/bits/alltypes.h.in
> @@ -0,0 +1,18 @@
> +#define _Addr long
> +#define _Int64 long
> +#define _Reg long
> +
> +#define __BYTE_ORDER 1234

`#define __BYTE_ORDER __LITTLE_ENDIAN` could be more consistent with other
arches.

> +#define __LONG_MAX 0x7fffffffffffffffL
> +
> +#ifndef __cplusplus
> +TYPEDEF int wchar_t;
> +#endif
> +
> +TYPEDEF float float_t;
> +TYPEDEF double double_t;
> +
> +TYPEDEF struct { long long __ll; long double __ld; } max_align_t;
> +
> +TYPEDEF unsigned nlink_t;

`unsigned int`? Same for other bare `unsigned` usages.

> +TYPEDEF int blksize_t;
> diff --git a/arch/loongarch64/bits/fenv.h b/arch/loongarch64/bits/fenv.h
> new file mode 100644
> index 00000000..264cafb5
> --- /dev/null
> +++ b/arch/loongarch64/bits/fenv.h
> @@ -0,0 +1,20 @@
> +#define FE_INEXACT    0x010000
> +#define FE_UNDERFLOW  0x020000
> +#define FE_OVERFLOW   0x040000
> +#define FE_DIVBYZERO  0x080000
> +#define FE_INVALID    0x100000
> +
> +#define FE_ALL_EXCEPT 0x1F0000
> +
> +#define FE_TONEAREST  0x000
> +#define FE_TOWARDZERO 0x100
> +#define FE_UPWARD     0x200
> +#define FE_DOWNWARD   0x300
> +
> +typedef unsigned fexcept_t;
> +
> +typedef struct {
> +	unsigned __cw;
> +} fenv_t;
> +
> +#define FE_DFL_ENV ((const fenv_t *) -1)
> diff --git a/arch/loongarch64/bits/float.h b/arch/loongarch64/bits/float.h
> new file mode 100644
> index 00000000..719c7908
> --- /dev/null
> +++ b/arch/loongarch64/bits/float.h
> @@ -0,0 +1,16 @@
> +#define FLT_EVAL_METHOD 0
> +
> +#define LDBL_TRUE_MIN 6.47517511943802511092443895822764655e-4966L
> +#define LDBL_MIN 3.36210314311209350626267781732175260e-4932L
> +#define LDBL_MAX 1.18973149535723176508575932662800702e+4932L
> +#define LDBL_EPSILON 1.92592994438723585305597794258492732e-34L
> +
> +#define LDBL_MANT_DIG 113
> +#define LDBL_MIN_EXP (-16381)
> +#define LDBL_MAX_EXP 16384
> +
> +#define LDBL_DIG 33
> +#define LDBL_MIN_10_EXP (-4931)
> +#define LDBL_MAX_10_EXP 4932
> +
> +#define DECIMAL_DIG 36
> diff --git a/arch/loongarch64/bits/posix.h b/arch/loongarch64/bits/posix.h
> new file mode 100644
> index 00000000..8068ce98
> --- /dev/null
> +++ b/arch/loongarch64/bits/posix.h
> @@ -0,0 +1,2 @@
> +#define _POSIX_V6_LP64_OFF64 1
> +#define _POSIX_V7_LP64_OFF64 1
> diff --git a/arch/loongarch64/bits/ptrace.h b/arch/loongarch64/bits/ptrace.h
> new file mode 100644
> index 00000000..741fc668
> --- /dev/null
> +++ b/arch/loongarch64/bits/ptrace.h
> @@ -0,0 +1,4 @@
> +#define PTRACE_GET_THREAD_AREA	25
> +#define PTRACE_SET_THREAD_AREA	26
> +#define PTRACE_GET_WATCH_REGS	0xd0
> +#define PTRACE_SET_WATCH_REGS	0xd1

This file doesn't seem necessary.

> diff --git a/arch/loongarch64/bits/reg.h b/arch/loongarch64/bits/reg.h
> new file mode 100644
> index 00000000..2633f39d
> --- /dev/null
> +++ b/arch/loongarch64/bits/reg.h
> @@ -0,0 +1,2 @@
> +#undef __WORDSIZE
> +#define __WORDSIZE 64
> diff --git a/arch/loongarch64/bits/setjmp.h b/arch/loongarch64/bits/setjmp.h
> new file mode 100644
> index 00000000..f4a7f8a3
> --- /dev/null
> +++ b/arch/loongarch64/bits/setjmp.h
> @@ -0,0 +1 @@
> +typedef unsigned long long __jmp_buf[22];

`unsigned long` would suffice?

> diff --git a/arch/loongarch64/bits/signal.h b/arch/loongarch64/bits/signal.h
> new file mode 100644
> index 00000000..16f56f21
> --- /dev/null
> +++ b/arch/loongarch64/bits/signal.h
> @@ -0,0 +1,80 @@
> +#if defined(_POSIX_SOURCE) || defined(_POSIX_C_SOURCE) \
> + || defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +
> +#if defined(_XOPEN_SOURCE) || defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
> +#define MINSIGSTKSZ 4096
> +#define SIGSTKSZ 16384
> +#endif
> +
> +typedef unsigned long greg_t, gregset_t[32];
> +
> +typedef struct sigcontext {
> +	unsigned long pc;
> +	gregset_t gregs;
> +	unsigned int flags;
> +	unsigned long extcontext[];
> +} mcontext_t;

This is different from the glibc definition:

```c
// Linux arch/loongarch/include/uapi/asm/sigcontext.h
struct sigcontext {
         __u64   sc_pc;
         __u64   sc_regs[32];
         __u32   sc_flags;
         __u64   sc_extcontext[0] __attribute__((__aligned__(16)));
};

#define CONTEXT_INFO_ALIGN      16
struct sctx_info {
         __u32   magic;
         __u32   size;
         __u64   padding;        /* padding to 16 bytes */
};

/* FPU context */
#define FPU_CTX_MAGIC           0x46505501
#define FPU_CTX_ALIGN           8p
struct fpu_context {
         __u64   regs[32];
         __u64   fcc;
         __u32   fcsr;
};

// glibc sysdeps/unix/sysv/linux/loongarch/sys/ucontext.h
typedef struct mcontext_t
{
   unsigned long long __pc;
   unsigned long long __gregs[32];
   unsigned int __flags;
   unsigned long long __extcontext[0] __attribute__((__aligned__(16)));
} mcontext_t;
```

Which you may want to keep consistent with.

> +
> +struct sigaltstack {
> +	void *ss_sp;
> +	int ss_flags;
> +	size_t ss_size;
> +};
> +
> +typedef struct __ucontext
> +{
> +	unsigned long uc_flags;
> +	struct __ucontext *uc_link;
> +	stack_t uc_stack;
> +	sigset_t uc_sigmask;
> +	long __uc_pad;
> +	mcontext_t uc_mcontext;
> +} ucontext_t;
> +
> +#define SA_NOCLDSTOP  1
> +#define SA_NOCLDWAIT  2
> +#define SA_SIGINFO    4
> +#define SA_ONSTACK    0x08000000
> +#define SA_RESTART    0x10000000
> +#define SA_NODEFER    0x40000000
> +#define SA_RESETHAND  0x80000000
> +#define SA_RESTORER   0x0
> +
> +#endif
> +
> +#define SIGHUP           1
> +#define SIGINT           2
> +#define SIGQUIT          3
> +#define SIGILL           4
> +#define SIGTRAP          5
> +#define SIGABRT          6
> +#define SIGIOT           SIGABRT
> +#define SIGBUS           7
> +#define SIGFPE           8
> +#define SIGKILL          9
> +#define SIGUSR1         10
> +#define SIGSEGV         11
> +#define SIGUSR2         12
> +#define SIGPIPE         13
> +#define SIGALRM         14
> +#define SIGTERM         15
> +#define SIGSTKFLT       16
> +#define SIGCHLD         17
> +#define SIGCONT         18
> +#define SIGSTOP         19
> +#define SIGTSTP         20
> +#define SIGTTIN         21
> +#define SIGTTOU         22
> +#define SIGURG          23
> +#define SIGXCPU         24
> +#define SIGXFSZ         25
> +#define SIGVTALRM       26
> +#define SIGPROF         27
> +#define SIGWINCH        28
> +#define SIGIO           29
> +#define SIGPOLL         SIGIO
> +#define SIGPWR          30
> +#define SIGSYS          31
> +#define SIGUNUSED       SIGSYS
> +
> +#define _NSIG 65
> diff --git a/arch/loongarch64/bits/stat.h b/arch/loongarch64/bits/stat.h
> new file mode 100644
> index 00000000..b7f4221b
> --- /dev/null
> +++ b/arch/loongarch64/bits/stat.h
> @@ -0,0 +1,18 @@
> +struct stat {
> +	dev_t st_dev;
> +	ino_t st_ino;
> +	mode_t st_mode;
> +	nlink_t st_nlink;
> +	uid_t st_uid;
> +	gid_t st_gid;
> +	dev_t st_rdev;
> +	unsigned long __pad;
> +	off_t st_size;
> +	blksize_t st_blksize;
> +	int __pad2;
> +	blkcnt_t st_blocks;
> +	struct timespec st_atim;
> +	struct timespec st_mtim;
> +	struct timespec st_ctim;
> +	unsigned __unused[2];
> +};
> diff --git a/arch/loongarch64/bits/stdint.h b/arch/loongarch64/bits/stdint.h
> new file mode 100644
> index 00000000..1bb147f2
> --- /dev/null
> +++ b/arch/loongarch64/bits/stdint.h
> @@ -0,0 +1,20 @@
> +typedef int32_t int_fast16_t;
> +typedef int32_t int_fast32_t;
> +typedef uint32_t uint_fast16_t;
> +typedef uint32_t uint_fast32_t;
> +
> +#define INT_FAST16_MIN  INT32_MIN
> +#define INT_FAST32_MIN  INT32_MIN
> +
> +#define INT_FAST16_MAX  INT32_MAX
> +#define INT_FAST32_MAX  INT32_MAX
> +
> +#define UINT_FAST16_MAX UINT32_MAX
> +#define UINT_FAST32_MAX UINT32_MAX
> +
> +#define INTPTR_MIN      INT64_MIN
> +#define INTPTR_MAX      INT64_MAX
> +#define UINTPTR_MAX     UINT64_MAX
> +#define PTRDIFF_MIN     INT64_MIN
> +#define PTRDIFF_MAX     INT64_MAX
> +#define SIZE_MAX        UINT64_MAX
> diff --git a/arch/loongarch64/bits/syscall.h.in b/arch/loongarch64/bits/syscall.h.in
> new file mode 100644
> index 00000000..689ff36b
> --- /dev/null
> +++ b/arch/loongarch64/bits/syscall.h.in
> @@ -0,0 +1,303 @@
> +#define __NR_io_setup                   0
> +#define __NR_io_destroy                 1
> +#define __NR_io_submit                  2
> +#define __NR_io_cancel                  3
> +#define __NR_io_getevents               4
> +#define __NR_setxattr                   5
> +#define __NR_lsetxattr                  6
> +#define __NR_fsetxattr                  7
> +#define __NR_getxattr                   8
> +#define __NR_lgetxattr                  9
> +#define __NR_fgetxattr                  10
> +#define __NR_listxattr                  11
> +#define __NR_llistxattr                 12
> +#define __NR_flistxattr                 13
> +#define __NR_removexattr                14
> +#define __NR_lremovexattr               15
> +#define __NR_fremovexattr               16
> +#define __NR_getcwd                     17
> +#define __NR_lookup_dcookie             18
> +#define __NR_eventfd2                   19
> +#define __NR_epoll_create1              20
> +#define __NR_epoll_ctl                  21
> +#define __NR_epoll_pwait                22
> +#define __NR_dup                        23
> +#define __NR_dup3                       24
> +#define __NR3264_fcntl                  25
> +#define __NR_inotify_init1              26
> +#define __NR_inotify_add_watch          27
> +#define __NR_inotify_rm_watch           28
> +#define __NR_ioctl                      29
> +#define __NR_ioprio_set                 30
> +#define __NR_ioprio_get                 31
> +#define __NR_flock                      32
> +#define __NR_mknodat                    33
> +#define __NR_mkdirat                    34
> +#define __NR_unlinkat                   35
> +#define __NR_symlinkat                  36
> +#define __NR_linkat                     37
> +#define __NR_umount2                    39
> +#define __NR_mount                      40
> +#define __NR_pivot_root                 41
> +#define __NR_nfsservctl                 42
> +#define __NR3264_statfs                 43
> +#define __NR3264_fstatfs                44
> +#define __NR3264_truncate               45
> +#define __NR3264_ftruncate              46
> +#define __NR_fallocate                  47
> +#define __NR_faccessat                  48
> +#define __NR_chdir                      49
> +#define __NR_fchdir                     50
> +#define __NR_chroot                     51
> +#define __NR_fchmod                     52
> +#define __NR_fchmodat                   53
> +#define __NR_fchownat                   54
> +#define __NR_fchown                     55
> +#define __NR_openat                     56
> +#define __NR_close                      57
> +#define __NR_vhangup                    58
> +#define __NR_pipe2                      59
> +#define __NR_quotactl                   60
> +#define __NR_getdents64                 61
> +#define __NR3264_lseek                  62
> +#define __NR_read                       63
> +#define __NR_write                      64
> +#define __NR_readv                      65
> +#define __NR_writev                     66
> +#define __NR_pread64                    67
> +#define __NR_pwrite64                   68
> +#define __NR_preadv                     69
> +#define __NR_pwritev                    70
> +#define __NR3264_sendfile               71
> +#define __NR_pselect6                   72
> +#define __NR_ppoll                      73
> +#define __NR_signalfd4                  74
> +#define __NR_vmsplice                   75
> +#define __NR_splice                     76
> +#define __NR_tee                        77
> +#define __NR_readlinkat                 78
> +#define __NR_sync                       81
> +#define __NR_fsync                      82
> +#define __NR_fdatasync                  83
> +#define __NR_sync_file_range            84
> +#define __NR_timerfd_create             85
> +#define __NR_timerfd_settime            86
> +#define __NR_timerfd_gettime            87
> +#define __NR_utimensat                  88
> +#define __NR_acct                       89
> +#define __NR_capget                     90
> +#define __NR_capset                     91
> +#define __NR_personality                92
> +#define __NR_exit                       93
> +#define __NR_exit_group                 94
> +#define __NR_waitid                     95
> +#define __NR_set_tid_address            96
> +#define __NR_unshare                    97
> +#define __NR_futex                      98
> +#define __NR_set_robust_list            99
> +#define __NR_get_robust_list            100
> +#define __NR_nanosleep                  101
> +#define __NR_getitimer                  102
> +#define __NR_setitimer                  103
> +#define __NR_kexec_load                 104
> +#define __NR_init_module                105
> +#define __NR_delete_module              106
> +#define __NR_timer_create               107
> +#define __NR_timer_gettime              108
> +#define __NR_timer_getoverrun           109
> +#define __NR_timer_settime              110
> +#define __NR_timer_delete               111
> +#define __NR_clock_settime              112
> +#define __NR_clock_gettime              113
> +#define __NR_clock_getres               114
> +#define __NR_clock_nanosleep            115
> +#define __NR_syslog                     116
> +#define __NR_ptrace                     117
> +#define __NR_sched_setparam             118
> +#define __NR_sched_setscheduler         119
> +#define __NR_sched_getscheduler         120
> +#define __NR_sched_getparam             121
> +#define __NR_sched_setaffinity          122
> +#define __NR_sched_getaffinity          123
> +#define __NR_sched_yield                124
> +#define __NR_sched_get_priority_max     125
> +#define __NR_sched_get_priority_min     126
> +#define __NR_sched_rr_get_interval      127
> +#define __NR_restart_syscall            128
> +#define __NR_kill                       129
> +#define __NR_tkill                      130
> +#define __NR_tgkill                     131
> +#define __NR_sigaltstack                132
> +#define __NR_rt_sigsuspend              133
> +#define __NR_rt_sigaction               134
> +#define __NR_rt_sigprocmask             135
> +#define __NR_rt_sigpending              136
> +#define __NR_rt_sigtimedwait            137
> +#define __NR_rt_sigqueueinfo            138
> +#define __NR_rt_sigreturn               139
> +#define __NR_setpriority                140
> +#define __NR_getpriority                141
> +#define __NR_reboot                     142
> +#define __NR_setregid                   143
> +#define __NR_setgid                     144
> +#define __NR_setreuid                   145
> +#define __NR_setuid                     146
> +#define __NR_setresuid                  147
> +#define __NR_getresuid                  148
> +#define __NR_setresgid                  149
> +#define __NR_getresgid                  150
> +#define __NR_setfsuid                   151
> +#define __NR_setfsgid                   152
> +#define __NR_times                      153
> +#define __NR_setpgid                    154
> +#define __NR_getpgid                    155
> +#define __NR_getsid                     156
> +#define __NR_setsid                     157
> +#define __NR_getgroups                  158
> +#define __NR_setgroups                  159
> +#define __NR_uname                      160
> +#define __NR_sethostname                161
> +#define __NR_setdomainname              162
> +#define __NR_getrlimit                  163
> +#define __NR_setrlimit                  164
> +#define __NR_getrusage                  165
> +#define __NR_umask                      166
> +#define __NR_prctl                      167
> +#define __NR_getcpu                     168
> +#define __NR_gettimeofday               169
> +#define __NR_settimeofday               170
> +#define __NR_adjtimex                   171
> +#define __NR_getpid                     172
> +#define __NR_getppid                    173
> +#define __NR_getuid                     174
> +#define __NR_geteuid                    175
> +#define __NR_getgid                     176
> +#define __NR_getegid                    177
> +#define __NR_gettid                     178
> +#define __NR_sysinfo                    179
> +#define __NR_mq_open                    180
> +#define __NR_mq_unlink                  181
> +#define __NR_mq_timedsend               182
> +#define __NR_mq_timedreceive            183
> +#define __NR_mq_notify                  184
> +#define __NR_mq_getsetattr              185
> +#define __NR_msgget                     186
> +#define __NR_msgctl                     187
> +#define __NR_msgrcv                     188
> +#define __NR_msgsnd                     189
> +#define __NR_semget                     190
> +#define __NR_semctl                     191
> +#define __NR_semtimedop                 192
> +#define __NR_semop                      193
> +#define __NR_shmget                     194
> +#define __NR_shmctl                     195
> +#define __NR_shmat                      196
> +#define __NR_shmdt                      197
> +#define __NR_socket                     198
> +#define __NR_socketpair                 199
> +#define __NR_bind                       200
> +#define __NR_listen                     201
> +#define __NR_accept                     202
> +#define __NR_connect                    203
> +#define __NR_getsockname                204
> +#define __NR_getpeername                205
> +#define __NR_sendto                     206
> +#define __NR_recvfrom                   207
> +#define __NR_setsockopt                 208
> +#define __NR_getsockopt                 209
> +#define __NR_shutdown                   210
> +#define __NR_sendmsg                    211
> +#define __NR_recvmsg                    212
> +#define __NR_readahead                  213
> +#define __NR_brk                        214
> +#define __NR_munmap                     215
> +#define __NR_mremap                     216
> +#define __NR_add_key                    217
> +#define __NR_request_key                218
> +#define __NR_keyctl                     219
> +#define __NR_clone                      220
> +#define __NR_execve                     221
> +#define __NR3264_mmap                   222
> +#define __NR3264_fadvise64              223
> +#define __NR_swapon                     224
> +#define __NR_swapoff                    225
> +#define __NR_mprotect                   226
> +#define __NR_msync                      227
> +#define __NR_mlock                      228
> +#define __NR_munlock                    229
> +#define __NR_mlockall                   230
> +#define __NR_munlockall                 231
> +#define __NR_mincore                    232
> +#define __NR_madvise                    233
> +#define __NR_remap_file_pages           234
> +#define __NR_mbind                      235
> +#define __NR_get_mempolicy              236
> +#define __NR_set_mempolicy              237
> +#define __NR_migrate_pages              238
> +#define __NR_move_pages                 239
> +#define __NR_rt_tgsigqueueinfo          240
> +#define __NR_perf_event_open            241
> +#define __NR_accept4                    242
> +#define __NR_recvmmsg                   243
> +#define __NR_arch_specific_syscall      244
> +#define __NR_wait4                      260
> +#define __NR_prlimit64                  261
> +#define __NR_fanotify_init              262
> +#define __NR_fanotify_mark              263
> +#define __NR_name_to_handle_at          264
> +#define __NR_open_by_handle_at          265
> +#define __NR_clock_adjtime              266
> +#define __NR_syncfs                     267
> +#define __NR_setns                      268
> +#define __NR_sendmmsg                   269
> +#define __NR_process_vm_readv           270
> +#define __NR_process_vm_writev          271
> +#define __NR_kcmp                       272
> +#define __NR_finit_module               273
> +#define __NR_sched_setattr              274
> +#define __NR_sched_getattr              275
> +#define __NR_renameat2                  276
> +#define __NR_seccomp                    277
> +#define __NR_getrandom                  278
> +#define __NR_memfd_create               279
> +#define __NR_bpf                        280
> +#define __NR_execveat                   281
> +#define __NR_userfaultfd                282
> +#define __NR_membarrier                 283
> +#define __NR_mlock2                     284
> +#define __NR_copy_file_range            285
> +#define __NR_preadv2                    286
> +#define __NR_pwritev2                   287
> +#define __NR_pkey_mprotect              288
> +#define __NR_pkey_alloc                 289
> +#define __NR_pkey_free                  290
> +#define __NR_statx                      291
> +#define __NR_io_pgetevents              292
> +#define __NR_rseq                       293
> +#define __NR_kexec_file_load            294
> +#define __NR_pidfd_send_signal          424
> +#define __NR_io_uring_setup             425
> +#define __NR_io_uring_enter             426
> +#define __NR_io_uring_register          427
> +#define __NR_open_tree		        428
> +#define __NR_move_mount		        429
> +#define __NR_fsopen		        430
> +#define __NR_fsconfig		        431
> +#define __NR_fsmount		        432
> +#define __NR_fspick		        433
> +#define __NR_pidfd_open		        434
> +#define __NR_clone3		        435
> +#define __NR_close_range	        436
> +#define __NR_openat2		        437
> +#define __NR_pidfd_getfd	        438
> +#define __NR_faccessat2		        439

Mixed tab and spaces for some lines, please use spaces only for consistency
with bulk of surrounding code.

> +#define __NR_process_madvise	        440
> +#define __NR_fcntl                      __NR3264_fcntl
> +#define __NR_statfs                     __NR3264_statfs
> +#define __NR_fstatfs                    __NR3264_fstatfs
> +#define __NR_truncate                   __NR3264_truncate
> +#define __NR_ftruncate                  __NR3264_ftruncate
> +#define __NR_lseek                      __NR3264_lseek
> +#define __NR_sendfile                   __NR3264_sendfile
> +#define __NR_mmap                       __NR3264_mmap
> +#define __NR_fadvise64                  __NR3264_fadvise64
> diff --git a/arch/loongarch64/bits/user.h b/arch/loongarch64/bits/user.h
> new file mode 100644
> index 00000000..4d4cd534
> --- /dev/null
> +++ b/arch/loongarch64/bits/user.h
> @@ -0,0 +1,5 @@
> +#define ELF_NGREG	45
> +#define ELF_NFPREG	33
> +
> +typedef unsigned long elf_greg_t, elf_gregset_t[ELF_NGREG];
> +typedef double elf_fpreg_t, elf_fpregset_t[ELF_NFPREG];
> diff --git a/arch/loongarch64/crt_arch.h b/arch/loongarch64/crt_arch.h
> new file mode 100644
> index 00000000..e0760d9e
> --- /dev/null
> +++ b/arch/loongarch64/crt_arch.h
> @@ -0,0 +1,13 @@
> +__asm__(
> +".text \n"
> +".global " START "\n"
> +".type   " START ", @function\n"
> +START ":\n"
> +"	move $fp, $zero\n"
> +"	move $a0, $sp\n"
> +".weak _DYNAMIC\n"
> +".hidden _DYNAMIC\n"
> +"	la.local $a1, _DYNAMIC\n"
> +"	bstrins.d $sp, $zero, 3, 0\n"
> +"	b " START "_c\n"
> +);
> diff --git a/arch/loongarch64/pthread_arch.h b/arch/loongarch64/pthread_arch.h
> new file mode 100644
> index 00000000..95ee4c7a
> --- /dev/null
> +++ b/arch/loongarch64/pthread_arch.h
> @@ -0,0 +1,13 @@
> +static inline uintptr_t __get_tp()
> +{
> +	register uintptr_t tp __asm__("tp");
> +	__asm__ ("" : "=r" (tp) );

While the current approach works, it's a bit fragile [1], and the simple and
plain riscv version works too:

uintptr_t tp;
__asm__ ("move %0, $tp" : "=r"(tp));

[1]:https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables

> +	return tp;
> +}
> +
> +#define TLS_ABOVE_TP
> +#define GAP_ABOVE_TP 0
> +
> +#define DTP_OFFSET 0
> +
> +#define MC_PC pc
> diff --git a/arch/loongarch64/reloc.h b/arch/loongarch64/reloc.h
> new file mode 100644
> index 00000000..865a648d
> --- /dev/null
> +++ b/arch/loongarch64/reloc.h
> @@ -0,0 +1,27 @@
> +#ifdef __loongarch64_soft_float

Should be `__loongarch_soft_float`.

> +#define FP_SUFFIX "-sf"
> +#else
> +#define FP_SUFFIX ""
> +#endif
> +
> +#define LDSO_ARCH "loongarch64"  FP_SUFFIX
> +
> +#define TPOFF_K (0x0)

a simple "0" would be enough...

> +
> +#define REL_PLT         R_LARCH_JUMP_SLOT
> +#define REL_COPY        R_LARCH_COPY
> +#define REL_DTPMOD      R_LARCH_TLS_DTPMOD64
> +#define REL_DTPOFF      R_LARCH_TLS_DTPREL64
> +#define REL_TPOFF       R_LARCH_TLS_TPREL64
> +#define REL_RELATIVE    R_LARCH_RELATIVE
> +#define REL_SYMBOLIC    R_LARCH_64
> +
> +#define CRTJMP(pc,sp) __asm__ __volatile__( \
> +	"move $sp,%1 ; jr %0" : : "r"(pc), "r"(sp) : "memory" )

One space after "$sp,".

> +
> +#define GETFUNCSYM(fp, sym, got) __asm__ ( \
> +	".hidden " #sym "\n" \
> +	".align 8 \n" \
> +	"	la.local $t1, "#sym" \n" \
> +	"	move %0, $t1 \n" \
> +	: "=r"(*(fp)) : : "memory" )

Does the generic version residing in ldso/dlstart.c not work?

> diff --git a/arch/loongarch64/syscall_arch.h b/arch/loongarch64/syscall_arch.h
> new file mode 100644
> index 00000000..4d5e1885
> --- /dev/null
> +++ b/arch/loongarch64/syscall_arch.h
> @@ -0,0 +1,137 @@
> +#define __SYSCALL_LL_E(x) (x)
> +#define __SYSCALL_LL_O(x) (x)
> +
> +#define SYSCALL_CLOBBERLIST \
> +	"$t0", "$t1", "$t2", "$t3", \
> +	"$t4", "$t5", "$t6", "$t7", "$t8", "memory"
> +
> +static inline long __syscall0(long n)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0");
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "=r"(a0)
> +		: "r"(a7)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall1(long n, long a)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +		: "r"(a7)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall2(long n, long a, long b)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall3(long n, long a, long b, long c)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +	register long a2 __asm__("$a2") = c;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1), "r"(a2)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall4(long n, long a, long b, long c, long d)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +	register long a2 __asm__("$a2") = c;
> +	register long a3 __asm__("$a3") = d;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall5(long n, long a, long b, long c, long d, long e)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +	register long a2 __asm__("$a2") = c;
> +	register long a3 __asm__("$a3") = d;
> +	register long a4 __asm__("$a4") = e;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +	register long a2 __asm__("$a2") = c;
> +	register long a3 __asm__("$a3") = d;
> +	register long a4 __asm__("$a4") = e;
> +	register long a5 __asm__("$a5") = f;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}
> +
> +static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
> +{
> +	register long a7 __asm__("$a7") = n;
> +	register long a0 __asm__("$a0") = a;
> +	register long a1 __asm__("$a1") = b;
> +	register long a2 __asm__("$a2") = c;
> +	register long a3 __asm__("$a3") = d;
> +	register long a4 __asm__("$a4") = e;
> +	register long a5 __asm__("$a5") = f;
> +	register long a6 __asm__("$a6") = g;
> +
> +	__asm__ __volatile__ (
> +		"syscall 0"
> +		: "+r"(a0)
> +	        : "r"(a7), "r"(a1), "r"(a2), "r"(a3), "r"(a4), "r"(a5), "r"(a6)
> +		: SYSCALL_CLOBBERLIST);
> +	return a0;
> +}

__syscall7 seems useless, you could remove this function altogether. Other syscall helpers look good though.

> +
> +#define VDSO_USEFUL
> +#define VDSO_CGT_SYM "__vdso_clock_gettime"
> +#define VDSO_CGT_VER "LINUX_5.10"

Correct.

> +
> +#define IPC_64  0
> diff --git a/configure b/configure
> index 6f5453f5..68062071 100755
> --- a/configure
> +++ b/configure
> @@ -328,6 +328,7 @@ i?86*) ARCH=i386 ;;
>  x86_64-x32*|x32*|x86_64*x32) ARCH=x32 ;;
>  x86_64-nt64*) ARCH=nt64 ;;
>  x86_64*) ARCH=x86_64 ;;
> +loongarch64*) ARCH=loongarch64 ;;
>  m68k*) ARCH=m68k ;;
>  mips64*|mipsisa64*) ARCH=mips64 ;;
>  mips*) ARCH=mips ;;

I think you're missing one change w.r.t. soft-float subarch detection, somewhere
below (should be between similar blocks for aarch64 and m68k):

if test "$ARCH" = "loongarch64" ; then
trycppif __loongarch_soft_float "$t" && SUBARCH=${SUBARCH}-sf
fi

> diff --git a/crt/loongarch64/crti.s b/crt/loongarch64/crti.s
> new file mode 100644
> index 00000000..81c43e6e
> --- /dev/null
> +++ b/crt/loongarch64/crti.s
> @@ -0,0 +1,15 @@
> +.section .init
> +.global _init
> +_init:
> +	addi.d $sp,$sp,-16
> +	st.d $fp,$sp,0
> +	st.d $ra,$sp,8
> +	addi.d $fp,$sp,16
> +
> +.section .fini
> +.global _fini
> +_fini:
> +	addi.d $sp,$sp,-16
> +	st.d $fp,$sp,0
> +	st.d $ra,$sp,8
> +	addi.d $fp,$sp,16
> diff --git a/crt/loongarch64/crtn.s b/crt/loongarch64/crtn.s
> new file mode 100644
> index 00000000..ca3fe80e
> --- /dev/null
> +++ b/crt/loongarch64/crtn.s
> @@ -0,0 +1,12 @@
> +.section .init
> +	ld.d $fp,$sp,0
> +	ld.d $ra,$sp,8
> +	addi.d $sp,$sp,16
> +	jr $ra
> +
> +
> +.section .fini
> +	ld.d $fp,$sp,0
> +	ld.d $ra,$sp,8
> +	addi.d $sp,$sp,16
> +	jr $ra

The crt changes shouldn't be necessary at all huh? I didn't see any custom asm
for riscv, for example.

> diff --git a/include/elf.h b/include/elf.h
> index 86e2f0bb..1b0e9e71 100644
> --- a/include/elf.h
> +++ b/include/elf.h
> @@ -697,6 +697,11 @@ typedef struct {
>  #define NT_MIPS_FP_MODE	0x801
>  #define NT_MIPS_MSA	0x802
>  #define NT_VERSION	1
> +#define NT_LOONGARCH_CPUCFG	0xa00
> +#define NT_LOONGARCH_CSR	0xa01
> +#define NT_LOONGARCH_LSX	0xa02
> +#define NT_LOONGARCH_LASX	0xa03
> +#define NT_LOONGARCH_LBT	0xa04
>  
>  
>  
> @@ -3288,6 +3293,66 @@ enum
>  #define R_RISCV_SET32           56
>  #define R_RISCV_32_PCREL        57
>  
> +/* LoongArch ELF Flags */
> +#define EM_LOONGARCH  258

Please move this definition to the EM_* group above.

> +
> +#define EF_LARCH_ABI             0x07
> +#define EF_LARCH_ABI_LP64D       0x03
> +
> +/* LoongArch specific dynamic relocations. */
> +#define R_LARCH_NONE                        0
> +#define R_LARCH_32                          1
> +#define R_LARCH_64                          2
> +#define R_LARCH_RELATIVE                    3
> +#define R_LARCH_COPY                        4
> +#define R_LARCH_JUMP_SLOT                   5
> +#define R_LARCH_TLS_DTPMOD32                6
> +#define R_LARCH_TLS_DTPMOD64                7
> +#define R_LARCH_TLS_DTPREL32                8
> +#define R_LARCH_TLS_DTPREL64                9
> +#define R_LARCH_TLS_TPREL32                 10
> +#define R_LARCH_TLS_TPREL64                 11
> +#define R_LARCH_IRELATIVE                   12
> +#define R_LARCH_MARK_LA                     20
> +#define R_LARCH_MARK_PCREL                  21
> +#define R_LARCH_SOP_PUSH_PCREL              22
> +#define R_LARCH_SOP_PUSH_ABSOLUTE           23
> +#define R_LARCH_SOP_PUSH_DUP                24
> +#define R_LARCH_SOP_PUSH_GPREL              25
> +#define R_LARCH_SOP_PUSH_TLS_TPREL          26
> +#define R_LARCH_SOP_PUSH_TLS_GOT            27
> +#define R_LARCH_SOP_PUSH_TLS_GD             28
> +#define R_LARCH_SOP_PUSH_PLT_PCREL          29
> +#define R_LARCH_SOP_ASSERT                  30
> +#define R_LARCH_SOP_NOT                     31
> +#define R_LARCH_SOP_SUB                     32
> +#define R_LARCH_SOP_SL                      33
> +#define R_LARCH_SOP_SR                      34
> +#define R_LARCH_SOP_ADD                     35
> +#define R_LARCH_SOP_AND                     36
> +#define R_LARCH_SOP_IF_ELSE                 37
> +#define R_LARCH_SOP_POP_32_S_10_5           38
> +#define R_LARCH_SOP_POP_32_U_10_12          39
> +#define R_LARCH_SOP_POP_32_S_10_12          40
> +#define R_LARCH_SOP_POP_32_S_10_16          41
> +#define R_LARCH_SOP_POP_32_S_10_16_S2       42
> +#define R_LARCH_SOP_POP_32_S_5_20           43
> +#define R_LARCH_SOP_POP_32_S_0_5_10_16_S2   44
> +#define R_LARCH_SOP_POP_32_S_0_10_10_16_S2  45
> +#define R_LARCH_SOP_POP_32_U                46
> +#define R_LARCH_ADD8                        47
> +#define R_LARCH_ADD16                       48
> +#define R_LARCH_ADD24                       49
> +#define R_LARCH_ADD32                       50
> +#define R_LARCH_ADD64                       51
> +#define R_LARCH_SUB8                        52
> +#define R_LARCH_SUB16                       53
> +#define R_LARCH_SUB24                       54
> +#define R_LARCH_SUB32                       55
> +#define R_LARCH_SUB64                       56
> +#define R_LARCH_GNU_VTINHERIT               57
> +#define R_LARCH_GNU_VTENTRY                 58

You may want to update the list here to include new reloc types in the LoongArch
ELF psABI v2.00. Check the latest binutils code for details. The e_flags
constants need updating too.

> +
>  #ifdef __cplusplus
>  }
>  #endif
> diff --git a/src/fenv/loongarch64/fenv.S b/src/fenv/loongarch64/fenv.S
> new file mode 100644
> index 00000000..aa012c97
> --- /dev/null
> +++ b/src/fenv/loongarch64/fenv.S
> @@ -0,0 +1,72 @@
> +#ifndef __loongarch_soft_float
> +
> +.global	feclearexcept
> +.type	feclearexcept,@function

Convention is to use a space after the directive, not a tab (it seems only the
mips port is using tabs here).

> +feclearexcept:
> +	li.w    $a1, 0x1f0000
> +	and     $a0, $a0, $a1
> +	movfcsr2gr $a1, $r0
> +	or	$a1, $a1, $a0
> +	xor	$a1, $a1, $a0
> +	movgr2fcsr $r0, $a1
> +	li.w    $a0, 0
> +	jr      $ra

Mixed space and tabs after the mnemonics; there are similar mistakes all
around so please fix all of them.

Other than that, the implementation isn't optimal, you should be able to use
`orn` to quickly clear the bits specified by the mask. Or you could consider
using the other FCSR selectors to hopefully avoid masking altogether.

> +
> +.global	feraiseexcept
> +.type	feraiseexcept,@function
> +feraiseexcept:
> +	li.w    $a1, 0x1f0000
> +	and     $a0, $a0, $a1
> +	movfcsr2gr $a1, $r0
> +	or	$a1, $a1, $a0
> +	movgr2fcsr $r0, $a1
> +        li.w    $a0, 0
> +	jr      $ra
> +
> +.global	fetestexcept
> +.type	fetestexcept,@function
> +fetestexcept:
> +        li.w    $a1, 0x1f0000
> +        and     $a0, $a0, $a1
> +	movfcsr2gr $a1, $r0
> +        and     $a0, $a1, $a0
> +	jr      $ra
> +
> +.global	fegetround
> +.type	fegetround,@function
> +fegetround:
> +	movfcsr2gr $a0, $r0
> +        andi    $a0, $a0, 0x300  //fcsr0.RM
> +	jr      $ra
> +
> +.global	__fesetround
> +.hidden __fesetround
> +.type	__fesetround,@function
> +__fesetround:
> +	movfcsr2gr $a1, $r0
> +	li.w	$a2, -769  //0xfffffcff
> +	and	$a1, $a1, $a2
> +	or	$a1, $a1, $a0
> +	movgr2fcsr $r0, $a1
> +        li.w    $a0, 0
> +	jr      $ra
> +
> +.global	fegetenv
> +.type	fegetenv,@function
> +fegetenv:
> +	movfcsr2gr $a1, $r0
> +	st.w	$a1, $a0, 0
> +        li.w    $a0, 0
> +	jr      $ra
> +
> +.global	fesetenv
> +.type	fesetenv,@function
> +fesetenv:
> +	addi.d	$a1, $a0, 1
> +	beq	$a1, $r0, 1f
> +	ld.w	$a1, $a0, 0
> +1:	movgr2fcsr $r0, $a1
> +        li.w    $a0, 0
> +	jr      $ra
> +
> +#endif
> diff --git a/src/ldso/loongarch64/dlsym.s b/src/ldso/loongarch64/dlsym.s
> new file mode 100644
> index 00000000..edb8214c
> --- /dev/null
> +++ b/src/ldso/loongarch64/dlsym.s
> @@ -0,0 +1,7 @@
> +.global dlsym
> +.hidden __dlsym
> +.type   dlsym,@function
> +dlsym:
> +	move	$a2, $ra
> +	la.global	$t0, __dlsym
> +	jr	$t0
> diff --git a/src/setjmp/loongarch64/longjmp.S b/src/setjmp/loongarch64/longjmp.S
> new file mode 100644
> index 00000000..4186974f
> --- /dev/null
> +++ b/src/setjmp/loongarch64/longjmp.S
> @@ -0,0 +1,37 @@
> +.global    _longjmp
> +.global    longjmp
> +.type    _longjmp,@function
> +.type    longjmp,@function
> +_longjmp:
> +longjmp:
> +	move   $t5, $a0
> +	move   $a0, $a1
> +
> +	bne     $a0, $zero, 1f

bnez

> +	addi.d  $a0, $a0, 1
> +
> +1:
> +	ld.d    $ra, $t5, 0
> +	ld.d    $sp, $t5, 8
> +	ld.d    $r21,$t5, 16
> +	ld.d    $fp, $t5, 24
> +	ld.d    $s0, $t5, 32
> +	ld.d    $s1, $t5, 40
> +	ld.d    $s2, $t5, 48
> +	ld.d    $s3, $t5, 56
> +	ld.d    $s4, $t5, 64
> +	ld.d    $s5, $t5, 72
> +	ld.d    $s6, $t5, 80
> +	ld.d    $s7, $t5, 88
> +	ld.d    $s8, $t5, 96
> +#ifndef __loongarch64_soft_float

`__loongarch_soft_float`.

> +	fld.d   $fs0, $t5, 104
> +	fld.d   $fs1, $t5, 112
> +	fld.d   $fs2, $t5, 120
> +	fld.d   $fs3, $t5, 128
> +	fld.d   $fs4, $t5, 136
> +	fld.d   $fs5, $t5, 144
> +	fld.d   $fs6, $t5, 152
> +	fld.d   $fs7, $t5, 160
> +#endif
> +	jr      $ra
> diff --git a/src/setjmp/loongarch64/setjmp.S b/src/setjmp/loongarch64/setjmp.S
> new file mode 100644
> index 00000000..f3bb7c70
> --- /dev/null
> +++ b/src/setjmp/loongarch64/setjmp.S
> @@ -0,0 +1,34 @@
> +.global    __setjmp
> +.global    _setjmp
> +.global    setjmp
> +.type    __setjmp,@function
> +.type    _setjmp,@function
> +.type    setjmp,@function
> +__setjmp:
> +_setjmp:
> +setjmp:
> +	st.d    $ra, $a0, 0
> +	st.d    $sp, $a0, 8
> +	st.d    $r21,$a0, 16
> +	st.d    $fp, $a0, 24
> +	st.d    $s0, $a0, 32
> +	st.d    $s1, $a0, 40
> +	st.d    $s2, $a0, 48
> +	st.d    $s3, $a0, 56
> +	st.d    $s4, $a0, 64
> +	st.d    $s5, $a0, 72
> +	st.d    $s6, $a0, 80
> +	st.d    $s7, $a0, 88
> +	st.d    $s8, $a0, 96
> +#ifndef __loongarch64_soft_float

`__loongarch_soft_float`.

> +	fst.d   $fs0, $a0, 104
> +	fst.d   $fs1, $a0, 112
> +	fst.d   $fs2, $a0, 120
> +	fst.d   $fs3, $a0, 128
> +	fst.d   $fs4, $a0, 136
> +	fst.d   $fs5, $a0, 144
> +	fst.d   $fs6, $a0, 152
> +	fst.d   $fs7, $a0, 160
> +#endif
> +	xor     $a0, $a0, $a0

move $a0, $zero

> +	jr      $ra
> diff --git a/src/signal/loongarch64/restore.s b/src/signal/loongarch64/restore.s
> new file mode 100644
> index 00000000..bca17eb7
> --- /dev/null
> +++ b/src/signal/loongarch64/restore.s
> @@ -0,0 +1,10 @@
> +.global	__restore_rt
> +.global	__restore
> +.hidden __restore_rt
> +.hidden __restore
> +.type	__restore_rt,@function
> +.type	__restore,@function
> +__restore_rt:
> +__restore:
> +	li.w	$a7, 139
> +	syscall	0
> diff --git a/src/signal/loongarch64/sigsetjmp.s b/src/signal/loongarch64/sigsetjmp.s
> new file mode 100644
> index 00000000..abd96c62
> --- /dev/null
> +++ b/src/signal/loongarch64/sigsetjmp.s
> @@ -0,0 +1,29 @@
> +.global sigsetjmp
> +.global __sigsetjmp
> +.type   sigsetjmp,@function
> +.type   __sigsetjmp,@function
> +sigsetjmp:
> +__sigsetjmp:
> +        move $t5, $a0
> +        move $t6, $a1
> +
> +        # comparing save mask with 0, if equals to 0 then
> +        # sigsetjmp is equal to setjmp.
> +        beq     $t6, $zero, 1f
> +        st.d    $ra, $t5, 168
> +
> +        # save base of got so that we can use it later
> +        # once we return from 'longjmp'
> +        la.global  $t8, setjmp
> +        jirl    $ra, $t8, 0
> +
> +        move    $a1, $a0        # Return from 'setjmp' or 'longjmp'
> +        ld.d    $ra, $t5, 168   # Restore ra of sigsetjmp
> +        move    $a0, $t5
> +
> +.hidden __sigsetjmp_tail
> +        la.global  $t8, __sigsetjmp_tail
> +        jr         $t8
> +1:
> +        la.global  $t8, setjmp
> +        jr         $t8

This is crazy complicated compared to the riscv port, why is the juggling between a0/a1 and t5/t6 necessary?

> diff --git a/src/thread/loongarch64/__set_thread_area.s b/src/thread/loongarch64/__set_thread_area.s
> new file mode 100644
> index 00000000..6fd09a92
> --- /dev/null
> +++ b/src/thread/loongarch64/__set_thread_area.s
> @@ -0,0 +1,7 @@
> +.global	__set_thread_area
> +.hidden __set_thread_area
> +.type	__set_thread_area,@function
> +__set_thread_area:
> +	move	$tp, $a0
> +	ori	$a0, $zero, 0

`move $a0, $zero` is enough.

> +	jr	$ra
> diff --git a/src/thread/loongarch64/__unmapself.s b/src/thread/loongarch64/__unmapself.s
> new file mode 100644
> index 00000000..3a44b850
> --- /dev/null
> +++ b/src/thread/loongarch64/__unmapself.s
> @@ -0,0 +1,7 @@
> +.global	__unmapself
> +.type	__unmapself, @function
> +__unmapself:
> +	li.d	$a7, 215                        # call munmap
> +	syscall	0
> +	li.d	$a7, 93                         # call exit
> +	syscall	0
> diff --git a/src/thread/loongarch64/clone.s b/src/thread/loongarch64/clone.s
> new file mode 100644
> index 00000000..86e69cfa
> --- /dev/null
> +++ b/src/thread/loongarch64/clone.s
> @@ -0,0 +1,47 @@
> +#__clone(func, stack, flags, arg, ptid, tls, ctid)
> +#         a0,    a1,   a2,    a3,  a4,  a5,   a6
> +# sys_clone3(struct clone_args *cl_args, size_t size)
> +#                                 a0             a1
> +
> +.global	__clone
> +.hidden __clone
> +.type	__clone,@function
> +__clone:
> +	# Save function pointer and argument pointer on new thread stack
> +	addi.d	$a1, $a1, -16
> +	st.d	$a0, $a1, 0	# save function pointer
> +	st.d	$a3, $a1, 8	# save argument pointer
> +
> +	li.d	$t0, ~0x004000ff  # mask CSIGNAL and CLONE_DETACHED
> +	and	$t1, $a2, $t0     # cl_args.flags
> +	li.d	$t0, 0x000000ff   # CSIGNAL
> +	and	$t2, $a2, $t0     # cl_args.exit_signal
> +
> +	bstrins.d $sp, $zero, 3, 0  # align stack to 16 bytes
> +	addi.d	$sp, $sp, -88   # struct clone_args
> +	st.d	$t1, $sp, 0     # flags
> +	st.d	$a4, $sp, 8     # pidfd
> +	st.d	$a6, $sp, 16    # child_tid
> +	st.d	$a4, $sp, 24    # parent_tid
> +	st.d	$t2, $sp, 32    # exit_signal
> +	st.d	$a1, $sp, 40    # stack
> +	st.d	$zero, $sp, 48  # stack_size
> +	st.d	$a5, $sp, 56    # tls
> +	st.d	$zero, $sp, 64  # set_tid
> +	st.d	$zero, $sp, 72  # set_tid_size
> +	st.d	$zero, $sp, 80  # cgroup
> +
> +	move	$a0, $sp
> +	li.d	$a1, 88
> +	li.d	$a7, 435	# __NR_clone3
> +	syscall 0		# call clone3
> +
> +	beqz	$a0, 1f		# whether child process
> +	addi.d	$sp, $sp, 88
> +	jr	$ra	        # parent process return
> +1:
> +	ld.d	$t8, $sp, 0     # function pointer
> +	ld.d	$a0, $sp, 8     # argument pointer
> +	jirl	$ra, $t8, 0     # call the user's function
> +	li.d	$a7, 93
> +	syscall	0		# child process exit

Do we still want to implement clone functionality entirely with clone3, given the
previous conclusion of keeping clone(2) for existing sandboxes to continue to work?

> diff --git a/src/thread/loongarch64/syscall_cp.s b/src/thread/loongarch64/syscall_cp.s
> new file mode 100644
> index 00000000..9f57d254
> --- /dev/null
> +++ b/src/thread/loongarch64/syscall_cp.s
> @@ -0,0 +1,29 @@
> +.global	__cp_begin
> +.hidden	__cp_begin
> +.global	__cp_end
> +.hidden	__cp_end
> +.global	__cp_cancel
> +.hidden	__cp_cancel
> +.hidden	__cancel
> +.global	__syscall_cp_asm
> +.hidden	__syscall_cp_asm
> +.type	__syscall_cp_asm,@function
> +
> +__syscall_cp_asm:
> +__cp_begin:
> +	ld.w	$a0, $a0, 0
> +	bnez	$a0, __cp_cancel
> +	move	$t8, $a1     # reserve system call number
> +	move	$a0, $a2
> +	move	$a1, $a3
> +	move	$a2, $a4
> +	move	$a3, $a5
> +	move	$a4, $a6
> +	move	$a5, $a7
> +	move	$a7, $t8
> +	syscall	0
> +__cp_end:
> +	jr	$ra
> +__cp_cancel:
> +	la.local $t8, __cancel
> +	jr	$t8
> -- 
> 2.36.0
> 
> 

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.