|
Message-ID: <20180411201057.GO3094@brightrain.aerifal.cx> Date: Wed, 11 Apr 2018 16:10:57 -0400 From: Rich Felker <dalias@...c.org> To: musl@...ts.openwall.com Subject: Re: [RFC PATCH] reduce severity of ldso reclaim_gaps hack On Wed, Jun 28, 2017 at 07:52:43PM +0300, Alexander Monakov wrote: > --- > In part inspired by an earlier patch by Timo Teräs. > > Without --gc-sections, __malloc_donate is dead weight in static linking. This > can be solved by moving it to a separate translation unit (but for that matter, > realloc is much heavier and can be factored out too). > > Extra jumps from 'free' to 'bin_chunk' and their non-contiguous code layout is > admittedly a problem, but I believe the impact is miniscule. As a minor > consolation, entries to bin_chunk from malloc now require fewer branches, and > hopefully new code is easier to follow. Sorry this lingered so long without review. I think it's good aside from a few minor details, but I do have a few questions to make sure I understand it, and that will hopefully reveal any problems if I'm wrong. > ldso/dynlink.c | 15 +++-------- > src/malloc/malloc.c | 71 ++++++++++++++++++++++++++++++++++++----------------- > 2 files changed, 52 insertions(+), 34 deletions(-) > > diff --git a/ldso/dynlink.c b/ldso/dynlink.c > index d20dbd87..985592ce 100644 > --- a/ldso/dynlink.c > +++ b/ldso/dynlink.c > @@ -475,23 +475,14 @@ static void redo_lazy_relocs() > /* A huge hack: to make up for the wastefulness of shared libraries > * needing at least a page of dirty memory even if they have no global > * data, we reclaim the gaps at the beginning and end of writable maps > - * and "donate" them to the heap by setting up minimal malloc > - * structures and then freeing them. */ > + * and "donate" them to the heap. */ > > static void reclaim(struct dso *dso, size_t start, size_t end) > { > - size_t *a, *z; > + void __malloc_donate(char *, char *); > if (start >= dso->relro_start && start < dso->relro_end) start = dso->relro_end; > if (end >= dso->relro_start && end < dso->relro_end) end = dso->relro_start; > - start = start + 6*sizeof(size_t)-1 & -4*sizeof(size_t); > - end = (end & -4*sizeof(size_t)) - 2*sizeof(size_t); > - if (start>end || end-start < 4*sizeof(size_t)) return; > - a = laddr(dso, start); > - z = laddr(dso, end); > - a[-2] = 1; > - a[-1] = z[0] = end-start + 2*sizeof(size_t) | 1; > - z[1] = 1; > - free(a); > + __malloc_donate(laddr(dso, start), laddr(dso, end)); > } Looks ok. > static void reclaim_gaps(struct dso *dso) > diff --git a/src/malloc/malloc.c b/src/malloc/malloc.c > index ef4c7368..b56fdaa2 100644 > --- a/src/malloc/malloc.c > +++ b/src/malloc/malloc.c > @@ -299,6 +299,8 @@ static int pretrim(struct chunk *self, size_t n, int i, int j) > return 1; > } > > +static void bin_chunk(struct chunk *); > + > static void trim(struct chunk *self, size_t n) > { > size_t n1 = CHUNK_SIZE(self); > @@ -314,7 +316,7 @@ static void trim(struct chunk *self, size_t n) > next->psize = n1-n | C_INUSE; > self->csize = n | C_INUSE; > > - free(CHUNK_TO_MEM(split)); > + bin_chunk(split); > } Looks ok. > void *malloc(size_t n) > @@ -410,10 +412,9 @@ void *realloc(void *p, size_t n) > size_t newlen = n + extra; > /* Crash on realloc of freed chunk */ > if (extra & 1) a_crash(); > - if (newlen < PAGE_SIZE && (new = malloc(n))) { > - memcpy(new, p, n-OVERHEAD); > - free(p); > - return new; > + if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) { > + n0 = n; > + goto copy_free_ret; > } > newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE; > if (oldlen == newlen) return p; > @@ -456,34 +457,20 @@ copy_realloc: > /* As a last resort, allocate a new chunk and copy to it. */ > new = malloc(n-OVERHEAD); > if (!new) return 0; > +copy_free_ret: > memcpy(new, p, n0-OVERHEAD); > free(CHUNK_TO_MEM(self)); > return new; > } This looks like an independent change that fixes a separate slight-overallocation bug. Is it related? > -void free(void *p) > +static void bin_chunk(struct chunk *self) > { > - struct chunk *self, *next; > + struct chunk *next = NEXT_CHUNK(self); > size_t final_size, new_size, size; > int reclaim=0; > int i; > > - if (!p) return; > - > - self = MEM_TO_CHUNK(p); > - > - if (IS_MMAPPED(self)) { > - size_t extra = self->psize; > - char *base = (char *)self - extra; > - size_t len = CHUNK_SIZE(self) + extra; > - /* Crash on double free */ > - if (extra & 1) a_crash(); > - __munmap(base, len); > - return; > - } > - > final_size = new_size = CHUNK_SIZE(self); > - next = NEXT_CHUNK(self); > > /* Crash on corrupted footer (likely from buffer overflow) */ > if (next->psize != self->csize) a_crash(); > @@ -544,3 +531,43 @@ void free(void *p) > > unlock_bin(i); > } > + > +#if defined(__GNUC__) > +__attribute__((cold)) > +#endif This can't be used as-is. It would need a configure check (gcc version dependent) and __cold__ if we want to, but unless there's a strong reason to include it I'd just omit it. > +static void unmap_chunk(struct chunk *self) > +{ > + size_t extra = self->psize; > + char *base = (char *)self - extra; > + size_t len = CHUNK_SIZE(self) + extra; > + /* Crash on double free */ > + if (extra & 1) a_crash(); > + __munmap(base, len); > +} > + > +void free(void *p) > +{ > + if (!p) return; > + > + struct chunk *self = MEM_TO_CHUNK(p); > + > + if (IS_MMAPPED(self)) > + unmap_chunk(self); > + else > + bin_chunk(self); > +} > + > +void __malloc_donate(char *start, char *end) > +{ It's confusing to see that this is equivalent to what's being removed from dynlink.c, but I think it may be correct. > + if (end - start < 2*OVERHEAD + SIZE_ALIGN) return; This does not seem like a sufficient bound to ensure the block is usable, but the next check after alignment may cover it. > + start += OVERHEAD + SIZE_ALIGN - 1; > + start -= (uintptr_t)start & (SIZE_ALIGN - 1); This looks correct. > + end -= (uintptr_t)end & (SIZE_ALIGN - 1); This does not subtract the OVERHEAD, but I think it's just a notational difference; my "end" pointed to the end of the chunk to be freed, and your "end" points to the beginning of the next (nonexistent) chunk. The MEM_TO_CHUNK below should compensate. > + if (end - start < OVERHEAD + SIZE_ALIGN) return; At this point, start and end both point just past a chunk header, meaning they have to differ by a multiple of SIZE_ALIGN. I don't see why OVERHEAD is needed here too. The check should be against SIZE_ALIGN I think (although by alignment they're equivalent). > + if (end - start >= MMAP_THRESHOLD) return; This does not seem necessary. Free chunks in the last bin can be larger than MMAP_THRESHOLD; they're just broken up to satisfy allocations. Of course it's unlikely to happen anyway. > + struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end); > + c->psize = n->csize = C_INUSE; > + c->csize = n->psize = (end - start) | C_INUSE; > + bin_chunk(c); > +} This looks ok, and the "end-start" vs my "end-start+2*sizeof(size_t)" matches the above differences. Rich
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.