|
Message-ID: <20170212023422.GE1520@brightrain.aerifal.cx> Date: Sat, 11 Feb 2017 21:34:22 -0500 From: Rich Felker <dalias@...c.org> To: musl@...ts.openwall.com Subject: Re: Re: a bug in bindtextdomain() and strip '.UTF-8' On Sat, Feb 11, 2017 at 02:00:56PM +0800, He X wrote: > --- a/src/locale/dcngettext.c 2017-02-06 14:39:17.860482624 +0000 > +++ b/src/locale/dcngettext.c 2017-02-06 14:39:17.860482624 +0000 > @@ -100,7 +100,8 @@ > size_t map_size; > void *volatile plural_rule; > volatile int nplurals; > - char name[]; > + struct binding *binding; > + struct __locale_map *lm; > }; As stated in the reply to message body, I think you need the category in the keying too, because there can be different .mo files loaded depending on which category was requested. > static char *dummy_gettextdomain() > @@ -120,58 +122,87 @@ > struct msgcat *p; > struct __locale_struct *loc = CURRENT_LOCALE; > const struct __locale_map *lm; > - const char *dirname, *locname, *catname; > - size_t dirlen, loclen, catlen, domlen; > + size_t domlen; > + struct binding *q; > > if ((unsigned)category >= LC_ALL) goto notrans; > > if (!domainname) domainname = __gettextdomain(); > > domlen = strnlen(domainname, NAME_MAX+1); > if (domlen > NAME_MAX) goto notrans; > > - dirname = gettextdir(domainname, &dirlen); > - if (!dirname) goto notrans; > + for (q=bindings; q; q=q->next) > + if (!strcmp(q->domainname, domainname) && q->active) > + break; > + if (!q) goto notrans; Looks ok. I had said this should be a function but it really doesn't need to be; it's plenty simple inline. > lm = loc->cat[category]; > if (!lm) { > notrans: > return (char *) ((n == 1) ? msgid1 : msgid2); > } > - locname = lm->name; > - > - catname = catnames[category]; > - catlen = catlens[category]; > - loclen = strlen(locname); > - > - size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; > - char name[namelen+1], *s = name; > - > - memcpy(s, dirname, dirlen); > - s[dirlen] = '/'; > - s += dirlen + 1; > - memcpy(s, locname, loclen); > - s[loclen] = '/'; > - s += loclen + 1; > - memcpy(s, catname, catlen); > - s[catlen] = '/'; > - s += catlen + 1; > - memcpy(s, domainname, domlen); > - s[domlen] = '.'; > - s[domlen+1] = 'm'; > - s[domlen+2] = 'o'; > - s[domlen+3] = 0; > > for (p=cats; p; p=p->next) > - if (!strcmp(p->name, name)) > + if (p->binding == q && p->lm == lm) > break; && p->cat == category > if (!p) { > + const char *dirname, *locname, *catname; > + size_t dirlen, loclen, catlen; > void *old_cats; > size_t map_size; > + > + dirname = q->dirname; > + locname = lm->name; > + catname = catnames[category]; > + > + dirlen = q->dirlen; > + loclen = strlen(locname); > + catlen = catlens[category]; Now that these are only computed once rather than per-call, optimizing out strlen is probably not worthwhile anymore, but it doesn't really hurt either. Not something you need to change, just a comment. > + > + size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3; > + char name[namelen+1], *s = name; > + char *str = name; > + > + memcpy(s, dirname, dirlen); > + s[dirlen] = '/'; > + s += dirlen + 1; > + memcpy(s, locname, loclen); > + s[loclen] = '/'; > + s += loclen + 1; > +skip_loc: > + memcpy(s, catname, catlen); > + s[catlen] = '/'; > + s += catlen + 1; > + memcpy(s, domainname, domlen); > + s[domlen] = '.'; > + s[domlen+1] = 'm'; > + s[domlen+2] = 'o'; > + s[domlen+3] = 0; Actually, now that this code is not a hot path, it should just be using snprintf to construct the pathname, I think. It would be a lot simpler and easier to ensure correctness. > + > const void *map = __map_file(name, &map_size); > - if (!map) goto notrans; > + if (!map) { > + if (s = strchr(name+dirlen+1, '@')) { > + *s++ = '/'; > + goto skip_loc;; > + } > + if ( str && (s = strchr(name+dirlen+1, '_')) && (s < strchr(name+dirlen+1, '/')) ) { > + if (str = strchr(locname, '@')) { > + loclen += locname - str; > + memcpy(s, str, loclen); > + s[loclen] = '/'; > + s += loclen + 1; > + str = 0; > + goto skip_loc; > + } else { > + *s++ = '/'; > + goto skip_loc; > + } > + } > + goto notrans; > + } Using snprintf should also make it easy to get rid of the goto/retry logic here, perhaps even with a 4-iteration loop and array of which format modifications happen on each iteration. > p = calloc(sizeof *p + namelen + 1, 1); > if (!p) { > __munmap((void *)map, map_size); > goto notrans; > @@ -209,7 +209,6 @@ > } > p->map = map; > p->map_size = map_size; > - memcpy(p->name, name, namelen+1); > do { > old_cats = cats; > p->next = old_cats; > --- a/src/locale/locale_map.c 2017-02-06 14:39:17.797148750 +0000 > +++ b/src/locale/locale_map.c 2017-02-06 14:39:17.797148750 +0000 > @@ -49,8 +49,8 @@ > } > > /* Limit name length and forbid leading dot or any slashes. */ > - for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++); > - if (val[0]=='.' || val[n]) val = "C.UTF-8"; > + for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/' && val[n]!='.'; n++); > + if (val[0]=='.' || (val[n] && val[n]!='.')) val = "C.UTF-8"; > int builtin = (val[0]=='C' && !val[1]) > || !strcmp(val, "C.UTF-8") > || !strcmp(val, "POSIX"); This looks ok but might still need some tweaks. Should an input like "zh_CN.GBK" get treated as "zh_CN" (thus outputting UTF-8 that might appear as junk on the user's terminal) or as "C" (no localization) with only ASCII characters (safe for the user's terminal), or even cause setlocale to fail and return an error so that the application can decide what to do? These are not technical comments on your patch but policy matters the community should weigh in on. Rich
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.