|
Message-ID: <20170812003107.GN1627@brightrain.aerifal.cx> Date: Fri, 11 Aug 2017 20:31:07 -0400 From: Rich Felker <dalias@...c.org> To: musl@...ts.openwall.com Subject: Re: Re: Issues in mbsnrtowcs and wcsnrtombs On Wed, Aug 09, 2017 at 08:57:27PM +0300, Mikhail Kremnyov wrote: > --- ./src/regression/mbsnrtowcs-overread.c 1970-01-01 03:00:00.000000000 +0300 > +++ ./src/regression/mbsnrtowcs-overread.c 2017-08-09 20:20:29.472003066 +0300 > @@ -0,0 +1,45 @@ > +// mbsnrtowcs issue, reported in www.openwall.com/lists/musl/2017/07/18/3 > +#include <locale.h> > +#include <stdio.h> > +#include <string.h> > +#include <wchar.h> > +#include "test.h" > + > +int main(void) > +{ > + const char *const chr = "\u044B"; This should probably use \x to write out the UTF-8 rather than assuming the compiler charset is UTF-8. > + const int chr_size = strlen(chr); > + // The passed length of the source string in bytes should be bigger than > + // 32*4 to force mbsnrtowcs to use the optimization based on mbsrtowcs. > + const int chr_count_to_convert = 1000; > + // Make sure that the source string has more characters after the passed > + // length. > + const int chr_count = chr_count_to_convert + 10; > + > + char src[chr_count * chr_size + 1]; > + // dest should also have extra space > + wchar_t dest[chr_count + 1]; > + size_t r; > + const char *str_ptr = src; > + mbstate_t mbs; > + > + for (int i = 0; i < chr_count; ++i) > + { > + memcpy(src + i * chr_size, chr, chr_size); > + } > + src[chr_count * chr_size] = 0; > + > + setlocale(LC_CTYPE, "en_US.UTF-8"); I think this should use t_setutf8(), added in commit defcb8d354e052f2d6ba230e7e2983546429a583, so that the logic for finding a UTF-8 locale is centralized and not dependent on en_US. > + > + memset(&mbs, 0, sizeof(mbs)); > + r = mbsnrtowcs(dest, &str_ptr, chr_count_to_convert * chr_size, > + sizeof(dest)/sizeof(dest[0]), &mbs); > + > + if (r != chr_count_to_convert) > + { > + t_error("Expected to convert %d characters, but converted %d\n", > + chr_count_to_convert, r); > + } > + > + return t_status; > +} > --- ./src/regression/wcsnrtombs_underread.c 1970-01-01 03:00:00.000000000 +0300 > +++ ./src/regression/wcsnrtombs_underread.c 2017-08-09 20:24:57.575995227 +0300 > @@ -0,0 +1,46 @@ > +// wcsnrtombs issue, reported in www.openwall.com/lists/musl/2017/07/18/3 > +#include <locale.h> > +#include <stdio.h> > +#include <string.h> > +#include <wchar.h> > +#include "test.h" > + > +#define TEST_CHR "\u044B" > + > +#define CAT_IMPL(x, y) x##y > +#define CAT(x, y) CAT_IMPL(x, y) > + > +int main(void) > +{ > + const wchar_t *const chr = CAT(L, TEST_CHR); > + const int chr_len_in_utf_8 = strlen(TEST_CHR); > + const int chr_size = wcslen(chr); > + // The number of characters should be greater than 32 to force wcsnrtombs > + // to use the optimization based on wcsrtombs. > + const int chr_count = 1000; > + wchar_t src[chr_count]; > + char dest[chr_count * 4]; > + size_t r; > + const wchar_t *str_ptr = src; > + mbstate_t mbs; > + > + for (int i = 0; i < chr_count; ++i) > + { > + memcpy(src + i, chr, sizeof(*chr)); > + } > + src[chr_count] = 0; > + > + setlocale(LC_CTYPE, "en_US.UTF-8"); Likewise. > --- ./src/multibyte/mbsnrtowcs.c 2017-08-08 16:19:29.311584832 +0300 > +++ ./src/multibyte/mbsnrtowcs.c 2017-08-09 20:33:27.515980317 +0300 I haven't reviewed this part yet but it's on my radar. Thanks. Rich
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.