|
|
Message-ID: <20260522022955.2999203-1-jtunney@gmail.com>
Date: Thu, 21 May 2026 19:29:36 -0700
From: Justine Tunney <jtunney@...il.com>
To: musl@...ts.openwall.com
Cc: Justine Tunney <jtunney@...il.com>
Subject: [PATCH] Make qsort 50% faster
This change makes musl qsort go 50% faster on common sizes by avoiding
memcpy calls. On x86-64 with cc -Os it can be as much as 2x faster. No
changes have been made to the smoothsort algorithm itself. This simple
patch brings musl much closer to other C libraries in terms of sorting
perf, while maintaining smoothsort's highly desirable properties, e.g.
small size, stack safety, signal safety, no malloc dependency, etc.
The cost is +670 bytes of .text (on x86-64) to specialize cycle() for
widths 4, 8, 12, 16, 20, 24, 28, and 32 (gcc, without stack protector).
But if you want to be like OpenBSD's qsort and only specialize 4 and 8,
then you're looking at a +140 byte increase. I chose the upper bound of
what's practical; after 32, gcc stops inlining memcpy cleanly.
Further analysis and benchmark numbers are available in my repo:
https://github.com/jart/musl-qsort-speedup
---
src/stdlib/qsort.c | 83 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 70 insertions(+), 13 deletions(-)
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 28607450..8af8b09d 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -20,6 +20,7 @@
*/
/* Minor changes by Rich Felker for integration in musl, 2011-04-27. */
+/* Some optimizations added by Justine Tunney for musl, 2026-05-21. */
/* Smoothsort, an adaptive variant of Heapsort. Memory usage: O(1).
Run time: Worst case O(n log n), close to O(n) in the mostly-sorted case. */
@@ -33,6 +34,7 @@
#define ntz(x) a_ctz_l((x))
typedef int (*cmpfun)(const void *, const void *, void *);
+typedef void (*cyclefun)(size_t, unsigned char **, long);
/* returns index of first bit set, excluding the low bit assumed to always
* be set, starting from low bit of p[0] up through high bit of p[1] */
@@ -42,11 +44,34 @@ static inline int pntz(size_t p[2]) {
return 0;
}
-static void cycle(size_t width, unsigned char* ar[], int n)
+/* specializes cycle() for constant width */
+#define DECLARE_CYCLE(NAME, WIDTH) \
+ static void NAME(size_t width, unsigned char* ar[], long n) \
+ { \
+ long i; \
+ char t[WIDTH]; \
+ if(n < 2) \
+ return; \
+ memcpy(t, ar[0], WIDTH); \
+ for (i = 0; i < n - 1; i++) \
+ memcpy(ar[i], ar[i + 1], WIDTH); \
+ memcpy(ar[n - 1], t, WIDTH); \
+ }
+
+DECLARE_CYCLE(cycle4, 4)
+DECLARE_CYCLE(cycle8, 8)
+DECLARE_CYCLE(cycle12, 12)
+DECLARE_CYCLE(cycle16, 16)
+DECLARE_CYCLE(cycle20, 20)
+DECLARE_CYCLE(cycle24, 24)
+DECLARE_CYCLE(cycle28, 28)
+DECLARE_CYCLE(cycle32, 32)
+
+static void cycle(size_t width, unsigned char* ar[], long n)
{
unsigned char tmp[256];
size_t l;
- int i;
+ long i;
if(n < 2) {
return;
@@ -97,7 +122,7 @@ static inline void shr(size_t p[2], int n)
#define AR_LEN (16 * sizeof(size_t))
#define AR_MASK (AR_LEN - 1)
-static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[])
+static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[], cyclefun cycler)
{
unsigned char *rt, *lf;
unsigned char *ar[AR_LEN];
@@ -121,10 +146,10 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int p
pshift -= 2;
}
}
- cycle(width, ar, i & AR_MASK);
+ cycler(width, ar, i & AR_MASK);
}
-static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
+static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[], cyclefun cycler)
{
unsigned char *stepson,
*rt, *lf;
@@ -158,8 +183,8 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
trusty = 0;
}
if(!trusty) {
- cycle(width, ar, i & AR_MASK);
- sift(head, width, cmp, arg, pshift, lp);
+ cycler(width, ar, i & AR_MASK);
+ sift(head, width, cmp, arg, pshift, lp, cycler);
}
}
@@ -169,11 +194,43 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
size_t i, size = width * nel;
unsigned char *head, *high;
size_t p[2] = {1, 0};
+ cyclefun cycler;
int pshift = 1;
int trail;
if (!size) return;
+ /* choose fastest cycle() function implementation */
+ switch(width) {
+ case 4:
+ cycler = cycle4;
+ break;
+ case 8:
+ cycler = cycle8;
+ break;
+ case 12:
+ cycler = cycle12;
+ break;
+ case 16:
+ cycler = cycle16;
+ break;
+ case 20:
+ cycler = cycle20;
+ break;
+ case 24:
+ cycler = cycle24;
+ break;
+ case 28:
+ cycler = cycle28;
+ break;
+ case 32:
+ cycler = cycle32;
+ break;
+ default:
+ cycler = cycle;
+ break;
+ }
+
head = base;
high = head + size - width;
@@ -182,14 +239,14 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
while(head < high) {
if((p[0] & 3) == 3) {
- sift(head, width, cmp, arg, pshift, lp);
+ sift(head, width, cmp, arg, pshift, lp, cycler);
shr(p, 2);
pshift += 2;
} else {
if(lp[pshift - 1] >= high - head) {
- trinkle(head, width, cmp, arg, p, pshift, 0, lp);
+ trinkle(head, width, cmp, arg, p, pshift, 0, lp, cycler);
} else {
- sift(head, width, cmp, arg, pshift, lp);
+ sift(head, width, cmp, arg, pshift, lp, cycler);
}
if(pshift == 1) {
@@ -205,7 +262,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
head += width;
}
- trinkle(head, width, cmp, arg, p, pshift, 0, lp);
+ trinkle(head, width, cmp, arg, p, pshift, 0, lp, cycler);
while(pshift != 1 || p[0] != 1 || p[1] != 0) {
if(pshift <= 1) {
@@ -217,10 +274,10 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
pshift -= 2;
p[0] ^= 7;
shr(p, 1);
- trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
+ trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp, cycler);
shl(p, 1);
p[0] |= 1;
- trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
+ trinkle(head - width, width, cmp, arg, p, pshift, 1, lp, cycler);
}
head -= width;
}
--
2.43.0
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.