Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <871973dd402e10af32e48118663af9bfe3fa23b9.1682024413.git.nabijaczleweli@nabijaczleweli.xyz>
Date: Thu, 20 Apr 2023 23:04:03 +0200
From: наб <nabijaczleweli@...ijaczleweli.xyz>
Cc: musl@...ts.openwall.com
Subject: [PATCH 2/2] regex: increase TRE_CHAR_MAX and use it for NUL with
 REG_STARTEND

This character cannot be named normally, but can be matched with
catch-alls like . and [^]

This brings us to feature parity with NetBSD:
	$ ./a.out '^a[^w]c$'  # matching "a\0c"
	0
	1, 4; -1, -1
	$ ./a.out '^a.c$'
	0
	1, 4; -1, -1
	$ ./a.out '.c$'
	0
	2, 4; -1, -1
	$ ./a.out '.*'
	0
	1, 4; -1, -1

        $ sed -i 's/cdef/adef/' a.c
	$ ./a.out '^\(a\).\1$'  # matching "a\0a"
	0
	1, 4; 1, 2
---
Please keep me in CC, as I'm not subscribed.

I haven't encountered an issue with this, and TRE_CHAR_MAX seems to be
"domain of characters from GET_NEXT_WCHAR()", not
"real characters in the current locale's encoding",
so expanding the domain with a special character for NUL seems fine.

 src/regex/regexec.c | 2 +-
 src/regex/tre.h     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/regex/regexec.c b/src/regex/regexec.c
index 2a2bded5..f09fdae1 100644
--- a/src/regex/regexec.c
+++ b/src/regex/regexec.c
@@ -60,7 +60,7 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
     if(!max_len) { next_c = '\0'; pos_add_next = 1; }                         \
     else if ((pos_add_next = mbtowc(&next_c, str_byte, max_len)) <= 0) {      \
         if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; }         \
-        else { pos_add_next++; if (startend) next_c = -1; };                  \
+        else { pos_add_next++; if (startend) next_c = TRE_CHAR_MAX; };        \
     }                                                                         \
     str_byte += pos_add_next;                                                 \
   } while (0)
diff --git a/src/regex/tre.h b/src/regex/tre.h
index 9aae851f..e913899a 100644
--- a/src/regex/tre.h
+++ b/src/regex/tre.h
@@ -50,7 +50,7 @@ typedef wchar_t tre_char_t;
 
 /* Wide characters. */
 typedef wint_t tre_cint_t;
-#define TRE_CHAR_MAX 0x10ffff
+#define TRE_CHAR_MAX (0x10ffff + 1)
 
 #define tre_isalnum iswalnum
 #define tre_isalpha iswalpha
-- 
2.30.2

Download attachment "signature.asc" of type "application/pgp-signature" (834 bytes)

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.