|
Message-ID: <121cd121406c58342e2c22d839099668aff64b33.1684077278.git.nabijaczleweli@nabijaczleweli.xyz>
Date: Sun, 14 May 2023 17:17:36 +0200
From: наб <nabijaczleweli@...ijaczleweli.xyz>
Cc: musl@...ts.openwall.com
Subject: [PATCH v3 2/2] regex: increase TRE_CHAR_MAX and use it for NUL with
REG_STARTEND
This character cannot be named normally, much like the NUL it's standing
in for, but can be matched with catch-alls like . and [^].
This brings us to feature parity with NetBSD:
$ ./a.out '^a[^w]c$' # matching "a\0c"
0
1, 4; -1, -1
$ ./a.out '^a.c$'
0
1, 4; -1, -1
$ ./a.out '.c$'
0
2, 4; -1, -1
$ ./a.out '.*'
0
1, 4; -1, -1
$ sed -i 's/cdef/adef/' a.c
$ ./a.out '^\(a\).\1$' # matching "a\0a"
0
1, 4; 1, 2
---
src/regex/regexec.c | 2 +-
src/regex/tre.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/regex/regexec.c b/src/regex/regexec.c
index 763dde58..f493a703 100644
--- a/src/regex/regexec.c
+++ b/src/regex/regexec.c
@@ -60,7 +60,7 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
if (!max_len) { next_c = L'\0'; pos_add_next = 1; } \
else if ((pos_add_next = mbtowc(&next_c, str_byte, max_len)) <= 0) { \
if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \
- else { pos_add_next++; if (startend) next_c = -1; }; \
+ else { pos_add_next++; if (startend) next_c = TRE_CHAR_MAX; }; \
} \
str_byte += pos_add_next; \
} while (0)
diff --git a/src/regex/tre.h b/src/regex/tre.h
index 9aae851f..e913899a 100644
--- a/src/regex/tre.h
+++ b/src/regex/tre.h
@@ -50,7 +50,7 @@ typedef wchar_t tre_char_t;
/* Wide characters. */
typedef wint_t tre_cint_t;
-#define TRE_CHAR_MAX 0x10ffff
+#define TRE_CHAR_MAX (0x10ffff + 1)
#define tre_isalnum iswalnum
#define tre_isalpha iswalpha
--
2.30.2
Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.