Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160319164624.GW21636@brightrain.aerifal.cx>
Date: Sat, 19 Mar 2016 12:46:24 -0400
From: Rich Felker <dalias@...c.org>
To: musl@...ts.openwall.com
Subject: Re: [PATCH 1/3] overhaul environment functions

Finally some review; sorry for the delay:

On Sun, Mar 13, 2016 at 09:53:48PM +0300, Alexander Monakov wrote:
> Rewrite environment access functions to slim down code, fix bugs and
> avoid invoking undefined behavior.
> 
> * avoid using int-typed iterators where size_t would be correct;

Very good.

> * use strncmp instead of memcmp consistently;

Agreed; removing assumptions about memcmp's implementation is a good
idea.

> * tighten prologues by invoking __strchrnul;

Generally I prefer to avoid impl-internal functions when a public one
with do without significant additional cost, but these functions
already have a fair amount of impl-internal stuff themselves so I'm
rather indifferent here.

BTW strcspn would be nice and idiomaic when the result you want is a
count anyway, rather than a pointer, but it would add a little bit to
the code size these functions depend on.

> * handle NULL environ.

OK.

> putenv:
> * handle "=value" input via unsetenv too (will return -1/EINVAL);

What happens now? It adds an env var with zero-length name?

> * rewrite and simplify __putenv; fix the leak caused by failure to
>   deallocate entry added by preceding setenv when called from putenv.

Sounds good.

> setenv:
> * move management of libc-allocated entries to this translation unit,
>   and use no-op weak symbols in putenv/unsetenv;

:)

> * no longer check for NULL first argument (per resolution to
>   Austin Group issue #185);

I suspect we'll get complaints about this change, but I'm not
necessarily against it. However since the old standard specified this
case we should probably not break software written to the old
standard.

> unsetenv:
> * rewrite; this fixes UB caused by testing a free'd pointer against
>   NULL on entry to subsequent loops.

Good.

> Not changed:
> Failure to extend allocation tracking array (previously __env_map, now
> env_alloced) is ignored rather than causing to report -1/ENOMEM to the
> caller; the worst-case consequence is leaking this allocation when it
> is removed or replaced in a subsequent environment access.

I'd like to fix this too but it can wait. I think it should be trivial
to do as a small patch on top.

> Initially UB in unsetenv was reported by Alexander Cherepanov.
> Using a weak alias to avoid pulling in malloc via unsetenv was
> suggested by Rich Felker.
> ---
>  src/env/getenv.c   |  15 ++++----
>  src/env/putenv.c   | 105 ++++++++++++++++++++++++-----------------------------
>  src/env/setenv.c   |  40 +++++++++++++-------
>  src/env/unsetenv.c |  58 ++++++++++++++---------------
>  4 files changed, 108 insertions(+), 110 deletions(-)
>  rewrite src/env/putenv.c (88%)
>  rewrite src/env/unsetenv.c (77%)
> 
> diff --git a/src/env/getenv.c b/src/env/getenv.c
> index 00c1bce..cf34672 100644
> --- a/src/env/getenv.c
> +++ b/src/env/getenv.c
> @@ -2,13 +2,14 @@
>  #include <string.h>
>  #include "libc.h"
>  
> +char *__strchrnul(const char *, int);
> +
>  char *getenv(const char *name)
>  {
> -	int i;
> -	size_t l = strlen(name);
> -	if (!__environ || !*name || strchr(name, '=')) return NULL;
> -	for (i=0; __environ[i] && (strncmp(name, __environ[i], l)
> -		|| __environ[i][l] != '='); i++);
> -	if (__environ[i]) return __environ[i] + l+1;
> -	return NULL;
> +	size_t l = __strchrnul(name, '=') - name;
> +	if (l && !name[l] && __environ)
> +		for (char **e = __environ; *e; e++)
> +			if (!strncmp(name, *e, l) && l[*e] == '=')

l[*e], nice. :-P

> +				return *e + l+1;
> +	return 0;
>  }
> diff --git a/src/env/putenv.c b/src/env/putenv.c
> dissimilarity index 88%
> index 7153042..39a71be 100644
> --- a/src/env/putenv.c
> +++ b/src/env/putenv.c
> @@ -1,58 +1,47 @@
> -#include <stdlib.h>
> -#include <string.h>
> -
> -extern char **__environ;
> -char **__env_map;
> -
> -int __putenv(char *s, int a)
> -{
> -	int i=0, j=0;
> -	char *z = strchr(s, '=');
> -	char **newenv = 0;
> -	char **newmap = 0;
> -	static char **oldenv;
> -
> -	if (!z) return unsetenv(s);
> -	if (z==s) return -1;
> -	for (; __environ[i] && memcmp(s, __environ[i], z-s+1); i++);
> -	if (a) {
> -		if (!__env_map) {
> -			__env_map = calloc(2, sizeof(char *));
> -			if (__env_map) __env_map[0] = s;
> -		} else {
> -			for (; __env_map[j] && __env_map[j] != __environ[i]; j++);
> -			if (!__env_map[j]) {
> -				newmap = realloc(__env_map, sizeof(char *)*(j+2));
> -				if (newmap) {
> -					__env_map = newmap;
> -					__env_map[j] = s;
> -					__env_map[j+1] = NULL;
> -				}
> -			} else {
> -				free(__env_map[j]);
> -				__env_map[j] = s;
> -			}
> -		}
> -	}
> -	if (!__environ[i]) {
> -		newenv = malloc(sizeof(char *)*(i+2));
> -		if (!newenv) {
> -			if (a && __env_map) __env_map[j] = 0;
> -			return -1;
> -		}
> -		memcpy(newenv, __environ, sizeof(char *)*i);
> -		newenv[i] = s;
> -		newenv[i+1] = 0;
> -		__environ = newenv;
> -		free(oldenv);
> -		oldenv = __environ;
> -	}
> -
> -	__environ[i] = s;
> -	return 0;
> -}
> -
> -int putenv(char *s)
> -{
> -	return __putenv(s, 0);
> -}
> +#include <stdlib.h>
> +#include <string.h>
> +#include "libc.h"
> +
> +char *__strchrnul(const char *, int);
> +
> +static void dummy(char *p, char *r) {}
> +weak_alias(dummy, __env_change);
> +
> +int __putenv(char *s, size_t l, char *r)
> +{
> +	size_t i=0;
> +	if (__environ)
> +		for (; __environ[i]; i++)
> +			if (!strncmp(__environ[i], s, l+1)) {
> +				char *tmp = __environ[i];
> +				__environ[i] = s;
> +				__env_change(tmp, r);
> +				return 0;
> +			}

As far as I can tell, this leaves multiple definitions in place. Am I
missing something? Maybe it's only unset and not replacement that's
safe against multiple-definition madness?

> +	static char **oldenv;
> +	char **newenv;
> +	if (__environ == oldenv) {
> +		newenv = realloc(oldenv, sizeof *newenv * (i+2));
> +		if (!newenv) goto oom;
> +	} else {
> +		newenv = malloc(sizeof *newenv * (i+2));
> +		if (!newenv) goto oom;
> +		if (i) memcpy(newenv, __environ, sizeof *newenv * i);
> +		free(oldenv);
> +	}

Rather than using malloc when __environ != oldenv, I think we should
use realloc on oldenv, so that we don't leak internally-allocated
environ arrays if the program repeatedly does environ=0 or calls your
new clearenv. Perhaps we should also store the allocated size and grow
it exponentially rather than assuming it's only the filled size and
calling realloc every time, but maybe it's actually better to resize
up/down. Do you have an opinion?

> diff --git a/src/env/setenv.c b/src/env/setenv.c
> index 76e8ee1..6984237 100644
> --- a/src/env/setenv.c
> +++ b/src/env/setenv.c
> @@ -2,29 +2,41 @@
>  #include <string.h>
>  #include <errno.h>
>  
> -int __putenv(char *s, int a);
> +char *__strchrnul(const char *, int);
> +int __putenv(char *, size_t, char *);
> +
> +void __env_change(char *p, char *r)
> +{
> +	static char **env_alloced;
> +	static size_t env_alloced_n;
> +	for (size_t i=0; i < env_alloced_n; i++)
> +		if (env_alloced[i] == p) {
> +			env_alloced[i] = r;
> +			free(p);
> +			return;
> +		}
> +	if (!r) return;
> +	char **new_ea = realloc(env_alloced, sizeof *new_ea * (env_alloced_n+1));
> +	if (!new_ea) return;
> +	new_ea[env_alloced_n++] = r;
> +	env_alloced = new_ea;
> +}

This could also be a candidate for exponential realloc strategy.

>  int setenv(const char *var, const char *value, int overwrite)
>  {
>  	char *s;
> -	int l1, l2;
> -
> -	if (!var || !*var || strchr(var, '=')) {
> +	size_t l1 = __strchrnul(var, '=') - var, l2;
> +	if (!l1 || var[l1]) {
>  		errno = EINVAL;
>  		return -1;
>  	}

As mentioned above we should probably keep the POSIX-old behavior of
accepting a null pointer and treating it as a diagnosed error.

>  	if (!overwrite && getenv(var)) return 0;
>  
> -	l1 = strlen(var);
>  	l2 = strlen(value);
>  	s = malloc(l1+l2+2);
> -	if (s) {
> -		memcpy(s, var, l1);
> -		s[l1] = '=';
> -		memcpy(s+l1+1, value, l2);
> -		s[l1+l2+1] = 0;
> -		if (!__putenv(s, 1)) return 0;
> -	}
> -	free(s);
> -	return -1;
> +	if (!s) return -1;
> +	memcpy(s, var, l1);
> +	s[l1] = '=';
> +	memcpy(s+l1+1, value, l2+1);
> +	return __putenv(s, l1, s);
>  }

This leaks when __putenv fails. It's no worse than before but should
probably be fixed.

> +#include <stdlib.h>
> +#include <string.h>
> +#include <errno.h>
> +#include "libc.h"
> +
> +char *__strchrnul(const char *, int);
> +
> +static void dummy(char *p, char *r) {}
> +weak_alias(dummy, __env_change);
> +
> +int unsetenv(const char *name)
> +{
> +	size_t l = __strchrnul(name, '=') - name;
> +	if (!l || name[l]) {
> +		errno = EINVAL;
> +		return -1;
> +	}
> +	if (!__environ) return 0;
> +	for (char **e = __environ; *e; e++)
> +		while (*e && !strncmp(name, *e, l) && l[*e] == '=') {
> +			char **ee = e, *tmp = *e;
> +			do *ee = *(ee+1);
> +			while (*++ee);
> +			__env_change(tmp, 0);
> +		}
> +	return 0;
> +}

I think this looks ok.

Rich

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.