diff --git a/src/locale/base64.h b/src/locale/base64.h new file mode 100644 index 00000000..866df450 --- /dev/null +++ b/src/locale/base64.h @@ -0,0 +1,8 @@ +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, +-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, +52,53,54,55,56,57,58,59,60,61,-1,-1,-1,-1,-1,-1, +-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, +15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, +-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, +41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, diff --git a/src/locale/iconv.c b/src/locale/iconv.c index 7fb2e1ef..50ec69a1 100644 --- a/src/locale/iconv.c +++ b/src/locale/iconv.c @@ -27,6 +27,7 @@ #define GB2312 0332 #define BIG5 0340 #define EUC_KR 0350 +#define UTF_7 0360 /* Definitions of charmaps. Each charmap consists of: * 1. Empty-string-terminated list of null-terminated aliases. @@ -56,6 +57,7 @@ static const unsigned char charmaps[] = "gb2312\0\0\332" "big5\0bigfive\0cp950\0big5hkscs\0\0\340" "euckr\0ksc5601\0ksx1001\0cp949\0\0\350" +"utf7\0\0\360" #include "codepages.h" ; @@ -90,6 +92,14 @@ static const unsigned short rev_jis[] = { #include "revjis.h" }; +static const unsigned int utf7[] = { +#include "utf7.h" +}; + +static const signed char base64[] = { +#include "base64.h" +}; + static int fuzzycmp(const unsigned char *a, const unsigned char *b) { for (; *a && *b; a++, b++) { @@ -151,6 +161,7 @@ iconv_t iconv_open(const char *to, const char *from) iconv_t cd = combine_to_from(t, f); switch (charmaps[f]) { + case UTF_7: case UTF_16: case UTF_32: case UCS2: @@ -224,6 +235,17 @@ static unsigned uni_to_jis(unsigned c) } } +static unsigned base64d(const char *s, unsigned n) +{ + unsigned c, r = 0; + while (n--) { + c = *(unsigned char *)s++; + if (c > 128 || base64[c] < 0) return -1U; + r = r<<6|base64[c]; + } + return r; +} + size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restrict out, size_t *restrict outb) { size_t x=0; @@ -319,6 +341,56 @@ size_t iconv(iconv_t cd, char **restrict in, size_t *restrict inb, char **restri } type = scd->state; continue; + case UTF_7: + l = 1; + if (c >= 128) goto ilseq; + if (!scd->state) { + if (!(utf7[c/32]&(1<state = 0; + if (c != '-') l = 0; + continue; + } + if (*inb < 2) goto starved; + d = *((unsigned char *)*in+1); + if (d >= 128) goto ilseq; + if (base64[d] < 0) { + k = scd->state>>1; + if (k == 0 || (base64[c]&(1<<6-k)-1)) goto ilseq; + scd->state = 0; + if (d == '-') l = 2; + continue; + } + l = 3+(6*3-(scd->state>>1)<16); + if (*inb < l+!scd->state) goto starved; + c = base64d(*in+!scd->state, l); + if (c == -1U) goto ilseq; + k = 6*l-(scd->state>>1)-16; + c = c>>k&0xffffu; + if (k) l--; else k = 6; + if ((unsigned)(c-0xdc00) < 0x400) goto ilseq; + if ((unsigned)(c-0xd800) < 0x400) { + k = 3+(6*3-(6-k)<16); + if (*inb < !scd->state+l+k) goto starved; + d = base64d(*in+!scd->state+l, k); + if (d == -1U) goto ilseq; + l += k; + k = 6*l-(scd->state>>1)-32; + d = d>>k&0xffffu; + if (k) l--; else k = 6; + if ((unsigned)(d-0xdc00) >= 0x400) goto ilseq; + c = ((c-0xd7c0)<<10) + (d-0xdc00); + } + l += !scd->state; + scd->state = (6-k)<<1|1; + break; case SHIFT_JIS: if (c < 128) break; if (c-0xa1 <= 0xdf-0xa1) { diff --git a/src/locale/utf7.h b/src/locale/utf7.h new file mode 100644 index 00000000..b04a7490 --- /dev/null +++ b/src/locale/utf7.h @@ -0,0 +1 @@ +9728,4294967295,4026531839,1073741823