musl - Re: [PATCH] mntent: deal with escaped whitespace in mtab and fstab

Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230329170418.GD3630668@port70.net>
Date: Wed, 29 Mar 2023 19:04:18 +0200
From: Szabolcs Nagy <nsz@...t70.net>
To: Matthias Goergens <matthias.goergens@...il.com>
Cc: musl@...ts.openwall.com
Subject: Re: [PATCH] mntent: deal with escaped whitespace in mtab and
 fstab

* Matthias Goergens <matthias.goergens@...il.com> [2023-03-29 23:17:51 +0800]:

> >From glibc's documentation:
> 
> > Since fields in the mtab and fstab files are separated by whitespace,
> > octal  escapes  are  used  to  represent  the characters space (\040),
> > tab (\011), newline (\012), and backslash (\\) in those files when they
> > occur  in  one  of  the  four strings  in  a  mntent  structure.  The
> > routines addmntent() and getmntent() will convert from string
> > representation to escaped representation and back.  When converting
> > from  escaped  representation,  the sequence \134 is also converted to a
> > backslash.
> 
> This fixes the issue reported in https://www.openwall.com/lists/musl/2021/12/14/1
> 
> Please pardon the previous broken patch that I tried to send directly
> via gmail.
> 
> This is my first time contributing to musl.  Please point out any ways
> to improve.  I probably got a few things wrong?
> 
> Thanks!
> 
> Addendum: this is a new version.  The first one did not copy the final
> null-byte.
> ---
>  src/misc/mntent.c | 75 ++++++++++++++++++++++++++++++++++++++++++-----
>  1 file changed, 68 insertions(+), 7 deletions(-)
> 
> diff --git a/src/misc/mntent.c b/src/misc/mntent.c
> index d404fbe3..49f4e386 100644
> --- a/src/misc/mntent.c
> +++ b/src/misc/mntent.c
> @@ -1,3 +1,4 @@
> +#include <assert.h>
>  #include <stdio.h>
>  #include <string.h>
>  #include <mntent.h>
> @@ -20,6 +21,37 @@ int endmntent(FILE *f)
>  	return 1;
>  }
>  
> +static inline int decode1(char** in_buf, char** out_buf,  const char* from, const char to) {
> +	if(strncmp(from, *in_buf, strlen(from)) == 0) {
> +		*(*out_buf)++ = to;
> +		*in_buf += strlen(from);
> +		return 1;
> +	}
> +	return 0;
> +}
> +
> +static inline char* decode(char* buf) {
> +	assert(buf != NULL);
> +	char* read_cursor = buf;
> +	char* write_cursor = buf;
> +	while(*read_cursor) {
> +		// space
> +		decode1(&read_cursor, &write_cursor, "\\040", '\040')
> +		// tab
> +		|| decode1(&read_cursor, &write_cursor, "\\011", '\011')
> +		// newline
> +		|| decode1(&read_cursor, &write_cursor, "\\012", '\012')
> +		// backslash
> +		|| decode1(&read_cursor, &write_cursor, "\\134", '\134')
> +		|| decode1(&read_cursor, &write_cursor, "\\\\", '\\')
> +		// default: copy char as is.
> +		|| (*write_cursor++ = *read_cursor++);
> +	}

this will try matching at every position, but
we know that each escape starts with \ so this
can be optimized.

i expect inlining decode1 will increase code size
unnecessarily. you can avoid that e.g. like

for (;;) {
	...
	const char *replace =
		"\040" "040" "\0"
		"\011" "011" "\0"
		...
		"\\" "\\" "\0"
		"\\" "";
	for (;;) {
		char c = *replace++;
		size_t n = strlen(replace);
		if (strncmp(pr, replace, n) == 0) {
			*pw++ = c;
			pr += n;
			break;
		}
		replace += n+1;
	}
	...
	char *next = __strchrnul(pr, '\\');
	memmove(pw, pr, next-pr);
	...
}

> +	*write_cursor = *read_cursor;
> +
> +	return buf;
> +}
> +
>  struct mntent *getmntent_r(FILE *f, struct mntent *mnt, char *linebuf, int buflen)
>  {
>  	int n[8], use_internal = (linebuf == SENTINEL);
> @@ -55,10 +87,10 @@ struct mntent *getmntent_r(FILE *f, struct mntent *mnt, char *linebuf, int bufle
>  	linebuf[n[5]] = 0;
>  	linebuf[n[7]] = 0;
>  
> -	mnt->mnt_fsname = linebuf+n[0];
> -	mnt->mnt_dir = linebuf+n[2];
> -	mnt->mnt_type = linebuf+n[4];
> -	mnt->mnt_opts = linebuf+n[6];
> +	mnt->mnt_fsname = decode(linebuf+n[0]);
> +	mnt->mnt_dir = decode(linebuf+n[2]);
> +	mnt->mnt_type = decode(linebuf+n[4]);
> +	mnt->mnt_opts = decode(linebuf+n[6]);
>  
>  	return mnt;
>  }
> @@ -69,12 +101,41 @@ struct mntent *getmntent(FILE *f)
>  	return getmntent_r(f, &mnt, SENTINEL, 0);
>  }
>  
> +static inline int write_string(FILE *f, const char* str)
> +{
> +	char c;
> +	int error_occured = 0;
> +	while(str && !error_occured && (c = *str++) != 0) {
> +		if(c == '\040') // space
> +			error_occured = fprintf(f, "%s", "\\040") < 0;
> +		else if (c == '\011') // tab
> +			error_occured = fprintf(f, "%s", "\\011") < 0;
> +		else if (c == '\012') // newline
> +			error_occured = fprintf(f, "%s", "\\012") < 0;
> +		else if (c == '\\')
> +			error_occured = fprintf(f, "%s", "\\\\") < 0;
> +		else
> +			error_occured = fprintf(f, "%c", c) < 0;
> +	}
> +	return error_occured;
> +}
> +
>  int addmntent(FILE *f, const struct mntent *mnt)
>  {
>  	if (fseek(f, 0, SEEK_END)) return 1;
> -	return fprintf(f, "%s\t%s\t%s\t%s\t%d\t%d\n",
> -		mnt->mnt_fsname, mnt->mnt_dir, mnt->mnt_type, mnt->mnt_opts,
> -		mnt->mnt_freq, mnt->mnt_passno) < 0;

the original code has a bug here: fprintf returns number of
chars printed but addmntent returns 0 to indicate success.

i think this should be fixed independently of the new feature.
(separate patch).

> +	flockfile(f);
> +	int result =
> +		write_string(f, mnt->mnt_fsname)
> +		|| (fprintf(f, "\t") < 0)
> +		|| write_string(f, mnt->mnt_dir)
> +		|| (fprintf(f, "\t") < 0)
> +		|| write_string(f, mnt->mnt_type)
> +		|| (fprintf(f, "\t") < 0)
> +		|| write_string(f, mnt->mnt_opts)
> +		|| (fprintf(f, "\t%d\t%d\n",
> +			mnt->mnt_freq, mnt->mnt_passno) < 0);
> +	funlockfile(f);

this looks a bit ugly (recursive locks and char-by-char
printf) but i don't see anything wrong (again i would not
use inline).

> +	return result;
>  }
>  
>  char *hasmntopt(const struct mntent *mnt, const char *opt)
> -- 
> 2.40.0
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.