|
Message-ID: <20190906214209.GK9017@brightrain.aerifal.cx> Date: Fri, 6 Sep 2019 17:42:09 -0400 From: Rich Felker <dalias@...c.org> To: musl@...ts.openwall.com Subject: Re: [PATCH v4 2/2] glob: implement GLOB_ALTDIRFUNC et al On Fri, Sep 06, 2019 at 09:40:22PM +0200, Ismael Luceno wrote: > diff --git a/include/glob.h b/include/glob.h > index 0ff70bdfeef2..26ea65cd1097 100644 > --- a/include/glob.h > +++ b/include/glob.h > @@ -16,7 +16,16 @@ typedef struct { > char **gl_pathv; > size_t gl_offs; > int __dummy1; > + > +#if defined(_BSD_SOURCE) || defined(_GNU_SOURCE) > + void (*gl_closedir)(void *); > + struct dirent *(*gl_readdir)(void *); > + void *(*gl_opendir)(const char *); > + int (*gl_lstat)(const char *__restrict, struct stat *__restrict); > + int (*gl_stat)(const char *__restrict, struct stat *__restrict); > +#else > void *__dummy2[5]; > +#endif > } glob_t; I think you need to forward-declare at least struct stat, and possibly also struct dirent, so that they refer to the same types as the declarations elsewhere. At least the argument list is a different scope and declares a *different* struct stat shadowing the file-scope one if the file-scope one is not already declared. With the above change and: > diff --git a/src/regex/glob.c b/src/regex/glob.c > index 7780e21ee113..4f329f053fe0 100644 > --- a/src/regex/glob.c > +++ b/src/regex/glob.c > @@ -1,7 +1,7 @@ > #define _BSD_SOURCE ^^^^^^^^^^^ combined, every use of g->[something] in glob() is now undefined behavior if the caller was using a standard profile where the __dummy2-containing version of glob_t above is the one that's used. That's because it's accessing one struct type via an lvalue of a different struct type. > +#include <sys/stat.h> > #include <glob.h> > #include <fnmatch.h> > -#include <sys/stat.h> Unrelated: this change is gratuitous. > @@ -224,6 +232,10 @@ static int expand_tilde(char **pat, char *buf, size_t *pos) > return 0; > } > > +static void wrap_closedir(void *p) { __closedir(p); } > +static struct dirent *wrap_readdir(void *d) { return __readdir(d); } > +static void *wrap_opendir(const char *path) { return __opendir(path); } > + > int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, int err), glob_t *restrict g) > { > struct match head = { .next = NULL }, *tail = &head; > @@ -231,9 +243,24 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i > size_t offs = (flags & GLOB_DOOFFS) ? g->gl_offs : 0; ^^^^^^^^^^ These are the accesses that become UB. To make them well-defined, they'd need to be something like G(g,gl_offs), where #define G(g,m) (*(1?(void*)((char*)(g)+offsetof(glob_t,m)):&((glob_t){0}).m) is the cleanest abstraction I can think of for the monstrosity needed to access "something layout-compatible with glob_t, but not glob_t, at address g". Imposing this ugliness to avoid UB is part of why I'm hesitant about this functionality. I think there should be good motivation for it if we want to go forward with it. > int error = 0; > char buf[PATH_MAX]; > + struct dirfn dirfn; > > if (!errfunc) errfunc = ignore_err; > > + if (flags & GLOB_ALTDIRFUNC) { > + dirfn.closedir = g->gl_closedir; > + dirfn.readdir = g->gl_readdir; > + dirfn.opendir = g->gl_opendir; > + dirfn.lstat = g->gl_lstat; > + dirfn.stat = g->gl_stat; > + } else { > + dirfn.closedir = wrap_closedir; > + dirfn.readdir = wrap_readdir; > + dirfn.opendir = wrap_opendir; > + dirfn.lstat = lstat; > + dirfn.stat = stat; > + } > + > if (!(flags & GLOB_APPEND)) { > g->gl_offs = offs; > g->gl_pathc = 0; > @@ -249,7 +276,7 @@ int glob(const char *restrict pat, int flags, int (*errfunc)(const char *path, i > if ((flags & (GLOB_TILDE | GLOB_TILDE_CHECK)) && *p == '~') > error = expand_tilde(&s, buf, &pos); > if (!error) > - error = do_glob(buf, pos, 0, s, flags, errfunc, &tail); > + error = do_glob(buf, pos, 0, s, flags, errfunc, &dirfn, &tail); > free(p); > } Not necessary for correctness, but if we're adding a context structure that gets passed down to each level of do_glob, we might as well put the errfunc in it too, and possibly also flags and &tail in it, so that we reduce the amount of state at each recursion frame rather than increasing it. It's likely this would make more sense to do as a separate patch though, to keep the diffs clear. Rich
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.