Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20170715195541.3136-2-nwmcsween@gmail.com>
Date: Sat, 15 Jul 2017 19:55:37 +0000
From: Nathan McSween <nwmcsween@...il.com>
To: musl@...ts.openwall.com
Cc: Nathan McSween <nwmcsween@...il.com>
Subject: [RFC PATCH 1/5] string: vectorize various functions

Text sizes with gcc version 6.3.0
Before | After
 49  |   153  memcmp.lo
 38  |   294  memrchr.lo
120  |   376  strcasecmp.lo
 96  |   305  strncmp.lo
155  |   454  strncasecmp.lo
 96  |   305  strncmp.lo
861  |  1887  (TOTALS)

The size increase is mainly due to vectorizing but also to both the
__may_alias__ attribute and the conditional before the alignment loop for
functions that take a size argument.

The str[n]casecmp macros are of particular interest and work via tagging the
high bit for any character in the range of A-Z and rhs said high bit two to give
a-z.
---
 src/string/memcmp.c      | 28 ++++++++++++++++++++++++----
 src/string/memrchr.c     | 29 +++++++++++++++++++++++------
 src/string/strcasecmp.c  | 34 ++++++++++++++++++++++++++++++----
 src/string/strcmp.c      | 27 ++++++++++++++++++++++++---
 src/string/strncasecmp.c | 37 +++++++++++++++++++++++++++++++++----
 src/string/strncmp.c     | 28 ++++++++++++++++++++++++++--
 6 files changed, 160 insertions(+), 23 deletions(-)

diff --git a/src/string/memcmp.c b/src/string/memcmp.c
index bdbce9f0..7d2e8077 100644
--- a/src/string/memcmp.c
+++ b/src/string/memcmp.c
@@ -1,8 +1,28 @@
 #include <string.h>
+#include <stdint.h>
 
-int memcmp(const void *vl, const void *vr, size_t n)
+#define aliases __attribute__((__may_alias__))
+
+int memcmp(const void *_l, const void *_r, size_t n)
 {
-	const unsigned char *l=vl, *r=vr;
-	for (; n && *l == *r; n--, l++, r++);
-	return n ? *l-*r : 0;
+	const unsigned char *l = _l, *r = _r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l == *r; l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) -1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && *wl == *wr
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l == *r; l++, r++, n--);
+
+	return n ? *l - *r : 0;
 }
diff --git a/src/string/memrchr.c b/src/string/memrchr.c
index a78e9d6c..165c422b 100644
--- a/src/string/memrchr.c
+++ b/src/string/memrchr.c
@@ -1,12 +1,29 @@
 #include <string.h>
-#include "libc.h"
+#include <stdint.h>
 
-void *__memrchr(const void *m, int c, size_t n)
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
+
+void *__memrchr(const void *_s, int i, size_t n)
 {
-	const unsigned char *s = m;
-	c = (unsigned char)c;
-	while (n--) if (s[n]==c) return (void *)(s+n);
-	return 0;
+	i = (unsigned char )i;
+	const unsigned char *s = _s;
+	const size_t wi = byte_repeat(i);
+
+	if (n < sizeof(size_t) * 3) goto bytewise;
+
+	for (; (uintptr_t)(s + n) & sizeof(size_t) - 1 && s[n - 1] != i; n--);
+	if ((uintptr_t)(s + n) & sizeof(size_t) - 1) return (void *)(s + n - 1);
+
+	for (; n >= sizeof(size_t) &&
+	       !word_has_zero(*(size_t *)(s + n - sizeof(size_t)) ^ wi)
+	     ; n -= sizeof(size_t));
+
+bytewise:
+	for (; n && s[n - 1] != i; n--);
+
+	return n ? (void *)(s + n - 1) : 0;
 }
 
 weak_alias(__memrchr, memrchr);
diff --git a/src/string/strcasecmp.c b/src/string/strcasecmp.c
index 3cd5f2d0..07d56c5d 100644
--- a/src/string/strcasecmp.c
+++ b/src/string/strcasecmp.c
@@ -1,15 +1,41 @@
 #include <strings.h>
 #include <ctype.h>
-#include "libc.h"
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \
+- byte_repeat(s) & ~((x) | byte_repeat(t)) - \
+byte_repeat((e) + 1)) & (~(x) & byte_repeat(t)))
+#define word_to_lower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2)
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 int strcasecmp(const char *_l, const char *_r)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
-	for (; *l && *r && (*l == *r || tolower(*l) == tolower(*r)); l++, r++);
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise;
+
+	for (;(uintptr_t)l & sizeof(size_t) - 1 && *l
+	     && tolower(*l) == tolower(*r); l++, r++);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r);
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; !word_has_zero(*wl) && word_to_lower(*wl) == word_to_lower(*wr)
+	     ; wl++, wr++);
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; *l && tolower(*l) == tolower(*r); l++, r++);
+
 	return tolower(*l) - tolower(*r);
 }
 
-int __strcasecmp_l(const char *l, const char *r, locale_t loc)
+int __strcasecmp_l(const char *l, const char *r, locale_t unused)
 {
 	return strcasecmp(l, r);
 }
diff --git a/src/string/strcmp.c b/src/string/strcmp.c
index 808bd837..a2f358a9 100644
--- a/src/string/strcmp.c
+++ b/src/string/strcmp.c
@@ -1,7 +1,28 @@
 #include <string.h>
+#include <stdint.h>
 
-int strcmp(const char *l, const char *r)
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+
+int strcmp(const char *_l, const char *_r)
 {
-	for (; *l==*r && *l; l++, r++);
-	return *(unsigned char *)l - *(unsigned char *)r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
+	if (((uintptr_t)l | (uintptr_t)r) & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r; l++, r++);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; !word_has_zero(*wl) && *wl == *wr; wl++, wr++);
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; *l && *l == *r; l++, r++);
+
+	return *l - *r;
 }
diff --git a/src/string/strncasecmp.c b/src/string/strncasecmp.c
index 3af53008..e1a9454a 100644
--- a/src/string/strncasecmp.c
+++ b/src/string/strncasecmp.c
@@ -1,16 +1,45 @@
 #include <strings.h>
 #include <ctype.h>
-#include "libc.h"
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
+#define word_tag_range(x, t, s, e) ((((x) | byte_repeat(t)) \
+- byte_repeat(s) & ~((x) | byte_repeat(t)) - \
+byte_repeat((e) + 1)) & (~(x) & byte_repeat(t)))
+#define word_to_tolower(x) ((x) - (word_tag_range((x), 'a', 'z', 0x80)) >> 2)
+#define weak_alias(o, n) extern __typeof__(o) n __attribute__((weak, alias(#o)))
 
 int strncasecmp(const char *_l, const char *_r, size_t n)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
 	if (!n--) return 0;
-	for (; *l && *r && n && (*l == *r || tolower(*l) == tolower(*r)); l++, r++, n--);
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (;(uintptr_t)l & sizeof(size_t) - 1 && *l
+	     && tolower(*l) == tolower(*r); l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return tolower(*l) - tolower(*r);
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && !word_has_zero(*wl)
+	       && word_to_tolower(*wl) == word_to_tolower(*wr)
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l && tolower(*l) == tolower(*r); l++, r++, n--);
+
 	return tolower(*l) - tolower(*r);
 }
 
-int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t loc)
+int __strncasecmp_l(const char *l, const char *r, size_t n, locale_t unused)
 {
 	return strncasecmp(l, r, n);
 }
diff --git a/src/string/strncmp.c b/src/string/strncmp.c
index e228843f..667498e1 100644
--- a/src/string/strncmp.c
+++ b/src/string/strncmp.c
@@ -1,9 +1,33 @@
 #include <string.h>
+#include <stdint.h>
+
+#define aliases __attribute__((__may_alias__))
+#define byte_repeat(x) ((size_t)~0 / 0xff * (x))
+#define word_has_zero(x) (((x) - byte_repeat(0x01)) & ~(x) & byte_repeat(0x80))
 
 int strncmp(const char *_l, const char *_r, size_t n)
 {
-	const unsigned char *l=(void *)_l, *r=(void *)_r;
+	const unsigned char *l = (const void *)_l, *r = (const void *)_r;
+	const size_t aliases *wl, aliases *wr;
+
 	if (!n--) return 0;
-	for (; *l && *r && n && *l == *r ; l++, r++, n--);
+
+	if (n < sizeof(size_t) * 3 || ((uintptr_t)l | (uintptr_t)r)
+	    & sizeof(size_t) - 1) goto bytewise;
+
+	for (; (uintptr_t)l & sizeof(size_t) - 1 && *l && *l == *r
+	     ; l++, r++, n--);
+	if ((uintptr_t)l & sizeof(size_t) - 1) return *l - *r;
+
+	wl = (const void *)l;
+	wr = (const void *)r;
+	for (; n >= sizeof(size_t) && !word_has_zero(*wl) && *wl == *wr
+	     ; wl++, wr++, n -= sizeof(size_t));
+	l = (const void *)wl;
+	r = (const void *)wr;
+
+bytewise:
+	for (; n && *l && *l == *r; l++, r++, n--);
+
 	return *l - *r;
 }
-- 
2.13.2

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.