Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <96BC8712-13B0-41C6-852B-ABF791B9A20A@denkimushi.com>
Date: Sun, 28 Aug 2022 00:11:30 +0100
From: Dunk <dunk@...kimushi.com>
To: musl@...ts.openwall.com
Cc: info@...ordhuis.nl
Subject: Re: [PATCH 1/1] resubmitting old statx patch with changes


> On 27 Aug 2022, at 19:10, Rich Felker <dalias@...c.org> wrote:
> 
> On Sat, Aug 27, 2022 at 03:57:52PM +0100, Duncan Bellamy wrote:
>> ---
>> include/sys/stat.h | 49 ++++++++++++++++++++++++++++++++++++++++++++++
>> src/stat/fstatat.c | 27 +------------------------
>> src/stat/statx.c   | 35 +++++++++++++++++++++++++++++++++
>> 3 files changed, 85 insertions(+), 26 deletions(-)
>> create mode 100644 src/stat/statx.c
>> 
>> diff --git a/include/sys/stat.h b/include/sys/stat.h
>> index 10d446c4..81424462 100644
>> --- a/include/sys/stat.h
>> +++ b/include/sys/stat.h
>> @@ -5,6 +5,7 @@ extern "C" {
>> #endif
>> 
>> #include <features.h>
>> +#include <stdint.h>
> 
> This can't be done unconditionally. Either the #include directive
> needs to be within the preprocessor conditional where statx is, or the
> individual __NEED_uint32_t etc need to be defined conditional on the
> same condition before bits/alltypes.h is included. The latter is
> probably better here.

I didn’t understand the later so did the former.

>> #define __NEED_dev_t
>> #define __NEED_ino_t
>> @@ -70,6 +71,54 @@ extern "C" {
>> #define UTIME_NOW  0x3fffffff
>> #define UTIME_OMIT 0x3ffffffe
>> 
>> +#if defined(_GNU_SOURCE)
>> +#define STATX_TYPE 1U
>> +#define STATX_MODE 2U
>> +#define STATX_NLINK 4U
>> +#define STATX_UID 8U
>> +#define STATX_GID 0x10U
>> +#define STATX_ATIME 0x20U
>> +#define STATX_MTIME 0x40U
>> +#define STATX_CTIME 0x80U
>> +#define STATX_INO 0x100U
>> +#define STATX_SIZE 0x200U
>> +#define STATX_BLOCKS 0x400U
>> +#define STATX_BASIC_STATS 0x7ffU
>> +#define STATX_BTIME 0x800U
>> +#define STATX_ALL 0xfffU
>> +
>> +struct statx_timestamp {
>> +    int64_t tv_sec;
>> +    uint32_t tv_nsec;
>> +    int32_t __pad;
>> +};
> 
> Minor nit but this could probably just be tv_nsec, __pad (both same
> type). This also eliminates the gratuitous need to expose the signed
> 32-bit type which is not used elsewhere.

changed

> I was looking at whether *all* of the types here could be replaced
> with equivalent ones that don't require exposing extra types, but the
> ones documented as uint64_t probably can't. All the uint32_t in
> principle could just be unsigned, and tv_sec time_t, but...
> 
>> +
>> +struct statx {
>> +    uint32_t stx_mask;
>> +    uint32_t stx_blksize;
>> +    uint64_t stx_attributes;
>> +    uint32_t stx_nlink;
>> +    uint32_t stx_uid;
>> +    uint32_t stx_gid;
>> +    uint16_t stx_mode;
>> +    uint16_t __pad0[1];
>> +    uint64_t stx_ino;
>> +    uint64_t stx_size;
>> +    uint64_t stx_blocks;
>> +    uint64_t stx_attributes_mask;
>> +    struct statx_timestamp stx_atime;
>> +    struct statx_timestamp stx_btime;
>> +    struct statx_timestamp stx_ctime;
>> +    struct statx_timestamp stx_mtime;
>> +    uint32_t stx_rdev_major;
>> +    uint32_t stx_rdev_minor;
>> +    uint32_t stx_dev_major;
>> +    uint32_t stx_dev_minor;
>> +    uint64_t __pad1[14];
>> +};
> 
> stx_ino etc. should not be assuming ino_t etc. are defined the same as
> uint64_t rather than something like unsigned long long. So we probably
> just go with writing the types as documented...
> 
>> +
>> +int statx(int, const char *__restrict, int, unsigned, struct statx *__restrict);
>> +#endif
>> int stat(const char *__restrict, struct stat *__restrict);
>> int fstat(int, struct stat *);
>> int lstat(const char *__restrict, struct stat *__restrict);
>> diff --git a/src/stat/fstatat.c b/src/stat/fstatat.c
>> index 74c51cf5..5b2248a9 100644
>> --- a/src/stat/fstatat.c
>> +++ b/src/stat/fstatat.c
>> @@ -7,36 +7,11 @@
>> #include <sys/sysmacros.h>
>> #include "syscall.h"
>> 
>> -struct statx {
>> -    uint32_t stx_mask;
>> -    uint32_t stx_blksize;
>> -    uint64_t stx_attributes;
>> -    uint32_t stx_nlink;
>> -    uint32_t stx_uid;
>> -    uint32_t stx_gid;
>> -    uint16_t stx_mode;
>> -    uint16_t pad1;
>> -    uint64_t stx_ino;
>> -    uint64_t stx_size;
>> -    uint64_t stx_blocks;
>> -    uint64_t stx_attributes_mask;
>> -    struct {
>> -        int64_t tv_sec;
>> -        uint32_t tv_nsec;
>> -        int32_t pad;
>> -    } stx_atime, stx_btime, stx_ctime, stx_mtime;
>> -    uint32_t stx_rdev_major;
>> -    uint32_t stx_rdev_minor;
>> -    uint32_t stx_dev_major;
>> -    uint32_t stx_dev_minor;
>> -    uint64_t spare[14];
>> -};
>> -
>> static int fstatat_statx(int fd, const char *restrict path, struct stat *restrict st, int flag)
>> {
>>    struct statx stx;
>> 
>> -    int ret = __syscall(SYS_statx, fd, path, flag, 0x7ff, &stx);
>> +    int ret = __syscall(SYS_statx, fd, path, flag, STATX_BASIC_STATS, &stx);
>>    if (ret) return ret;
>> 
>>    *st = (struct stat){
> 
> This can be a separate change from adding statx, but it's easy to
> separate when merging.

moved to separate commit

>> diff --git a/src/stat/statx.c b/src/stat/statx.c
>> new file mode 100644
>> index 00000000..ff49841b
>> --- /dev/null
>> +++ b/src/stat/statx.c
>> @@ -0,0 +1,35 @@
>> +#define _GNU_SOURCE
>> +#include <sys/stat.h>
>> +#include <syscall.h>
>> +#include <sys/sysmacros.h>
>> +#include <errno.h>
>> +
>> +int statx(int dirfd, const char *restrict path, int flags, unsigned mask, struct statx *restrict stx)
>> +{
>> +    int ret = syscall(SYS_statx, dirfd, path, flags, mask, stx);
>> +    if (ret == ENOSYS) {
>> +        struct stat st;
>> +        fstatat(dir_fd, path, &st, flags);
>> +        stx.stx_dev_major = major(st.st_dev);
>> +        stx.stx_dev_minor = minor(st.st_dev);
>> +        stx.stx_ino = st.st_ino;
>> +        stx.stx_mode = st.st_mode;
>> +        stx.stx_nlink = st.st_nlink;
>> +        stx.stx_uid = st.st_uid;
>> +        stx.stx_gid = st.st_gid;
>> +        stx.stx_size = st.st_size;
>> +        stx.stx_blksize = st.st_blksize;
>> +        stx.stx_blocks = st.st_blocks;
>> +        stx.stx_atime.tv_sec = st.st_atim.tv_sec;
>> +        stx.stx_atime.tv_nsec = st.st_atim.tv_nsec;
>> +        stx.stx_mtime.tv_sec = st.st_mtim.tv_sec;
>> +        stx.stx_mtime.tv_nsec = st.st_mtim.tv_nsec;
>> +        stx.stx_ctime.tv_sec = st.st_ctim.tv_sec;
>> +        stx.stx_ctime.tv_nsec = st.st_ctim.tv_nsec;
>> +        stx.stx_btime = 0;
>> +        stx.stx_mask = STATX_BASIC_STATS;
>> +        ret = EINVAL;
>> +    }
>> +
>> +    return ret;
>> +}
>> -- 
>> 2.34.1
> 
> I think this wasn't tested because it won't even compile (stx. instead
> of stx->). It's also wrongly assuming syscall() returned a positive
> error code rather than -1 on error and setting errno, but you should
> be using the __syscall form that returns a negated error code, then
> __syscall_ret at the end. I would write it as:
> 
>    int ret = __syscall(SYS_statx, dirfd, path, flags, mask, stx);
>    if (ret != -ENOSYS) return __syscall_ret(ret);
> 
> then the fallback case outside a conditional. The fallback can't
> assume fstatat succeeded like you're doing either. It needs to return
> -1 immediately if fstatat fails.

Yes it wasn’t compiled, I copied the old patch and code from `fstatat_statx` for copying the data.  Compiled it locally now.

> 
> All of this code needs to be conditional on SYS_fstatat existing, as
> new archs don't have it and only have SYS_statx.

Changed to this

> One annoying thing about this but I don't know a good fix; maybe you
> have an idea: if SYS_statx fails with ENOSYS, the call to fstatat will
> immediately perform SYS_statx again, only to have it fail, then
> finally fall back to SYS_fstatat or other syscalls after two failures.
> I'm not sure if it makes sense to expose __fstatat_kstat
> libc-internally (hidden) to use here or do something else; that's
> kinda getting into more complexity around this than I'd like for the
> sake of optimizing old systems.

I don’t really understand the fallback mechanism, or why statx would fail and fstatat work on some systems.  Maybe use defines at compile time so either one is used or the other? 

> Rich

Duncan

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.