Follow @Openwall on Twitter for new release announcements and other news
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <7dCgaZ0wlRdG7ojsD2O4fYAzpxj44J6HgrgGNH0rCiKabnoTUGoblGS7qOUiWnVFFtJYjoD8R4DaWIYW5UsZLqT9n4OeHqNcdHPA1kw9odo=@proton.me>
Date: Tue, 14 Apr 2026 03:24:32 +0000
From: David Sparks <sparks05@...ton.me>
To: "musl@...ts.openwall.com" <musl@...ts.openwall.com>
Cc: "dalias@...ifal.cx" <dalias@...ifal.cx>, "mailto.luca.kellermann@...il.com" <mailto.luca.kellermann@...il.com>, David Sparks <sparks05@...ton.me>
Subject: Some additional qsort patches

The recent qsort patches caused conflicts with some local changes
I've had sitting around for a long time.  Since the code is fresh
in people's minds, I was motivated to split them up into a sensible
patch series and submit it here. Here it is, all in one message.
(They're in chronological, "git log -p --reverse" order.)

The first several are cosmetic, but the two big wins are the merger
of the pntz() and shr() helper functions, and the saving one
one compare (from 3 to 2) per level in sift().  (I could try to
do the same for trinkle(), but it's messier.)

Tested, but I have almost certainly failed to create acceptable
patches on my first attempt.  Still, Cunningham's law suggests
that it's better to post an incorrect patch than to ask a lot
of questions.

TODO: Clean out redundant header files.  Neither stdint.h nor stdlib.h
are actually needed.  (Noticed as I was composing this mail.)

commit f9342ddc97259a01e48baf90f7ac5bacd0720f6a
Author: David Sparks <sparks05@...ton.me>
Date:   Sat Apr 11 16:36:15 2026 -0400

    src/stdlib/qsort.c: Canonicalize spacing
    
    - Include a space after the keyword in "if (...)", "while (...)"
      and "for (...)".
    - Omit braces around single-statement bodies, e.g.
      "if (cond) break;" rather than "if (cond) { break; }".
    
    This seems to be consistent with the rest of musl (and the Linux
    kernel coding style section 3), and is a no-op cleanup before
    other work.
    
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 28607450..d0a19159 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -48,15 +48,14 @@ static void cycle(size_t width, unsigned char* ar[], int n)
 	size_t l;
 	int i;
 
-	if(n < 2) {
+	if (n < 2)
 		return;
-	}
 
 	ar[n] = tmp;
-	while(width) {
+	while (width) {
 		l = sizeof(tmp) < width ? sizeof(tmp) : width;
 		memcpy(ar[n], ar[0], l);
-		for(i = 0; i < n; i++) {
+		for (i = 0; i < n; i++) {
 			memcpy(ar[i], ar[i + 1], l);
 			ar[i] += l;
 		}
@@ -67,7 +66,7 @@ static void cycle(size_t width, unsigned char* ar[], int n)
 /* shl() and shr() need n > 0 */
 static inline void shl(size_t p[2], int n)
 {
-	if(n >= 8 * sizeof(size_t)) {
+	if (n >= 8 * sizeof(size_t)) {
 		n -= 8 * sizeof(size_t);
 		p[1] = p[0];
 		p[0] = 0;
@@ -80,7 +79,7 @@ static inline void shl(size_t p[2], int n)
 
 static inline void shr(size_t p[2], int n)
 {
-	if(n >= 8 * sizeof(size_t)) {
+	if (n >= 8 * sizeof(size_t)) {
 		n -= 8 * sizeof(size_t);
 		p[0] = p[1];
 		p[1] = 0;
@@ -104,14 +103,13 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int p
 	int i = 1;
 
 	ar[0] = head;
-	while(pshift > 1) {
+	while (pshift > 1) {
 		rt = head - width;
 		lf = head - width - lp[pshift - 2];
 
-		if(cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0) {
+		if (cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0)
 			break;
-		}
-		if(cmp(lf, rt, arg) >= 0) {
+		if (cmp(lf, rt, arg) >= 0) {
 			ar[i++ & AR_MASK] = lf;
 			head = lf;
 			pshift -= 1;
@@ -137,17 +135,15 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 	p[1] = pp[1];
 
 	ar[0] = head;
-	while(p[0] != 1 || p[1] != 0) {
+	while (p[0] != 1 || p[1] != 0) {
 		stepson = head - lp[pshift];
-		if(cmp(stepson, ar[0], arg) <= 0) {
+		if (cmp(stepson, ar[0], arg) <= 0)
 			break;
-		}
-		if(!trusty && pshift > 1) {
+		if (!trusty && pshift > 1) {
 			rt = head - width;
 			lf = head - width - lp[pshift - 2];
-			if(cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0) {
+			if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0)
 				break;
-			}
 		}
 
 		ar[i++ & AR_MASK] = stepson;
@@ -157,7 +153,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 		pshift += trail;
 		trusty = 0;
 	}
-	if(!trusty) {
+	if (!trusty) {
 		cycle(width, ar, i & AR_MASK);
 		sift(head, width, cmp, arg, pshift, lp);
 	}
@@ -178,21 +174,20 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 	high = head + size - width;
 
 	/* Precompute Leonardo numbers, scaled by element width */
-	for(lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
+	for (lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
 
-	while(head < high) {
-		if((p[0] & 3) == 3) {
+	while (head < high) {
+		if ((p[0] & 3) == 3) {
 			sift(head, width, cmp, arg, pshift, lp);
 			shr(p, 2);
 			pshift += 2;
 		} else {
-			if(lp[pshift - 1] >= high - head) {
+			if (lp[pshift - 1] >= high - head)
 				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
-			} else {
+			else
 				sift(head, width, cmp, arg, pshift, lp);
-			}
 
-			if(pshift == 1) {
+			if (pshift == 1) {
 				shl(p, 1);
 				pshift = 0;
 			} else {
@@ -207,8 +202,8 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 
 	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
 
-	while(pshift != 1 || p[0] != 1 || p[1] != 0) {
-		if(pshift <= 1) {
+	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
+		if (pshift <= 1) {
 			trail = pntz(p);
 			shr(p, trail);
 			pshift += trail;

commit 082d1e9b3cc5d302d5ac88a82a3a80020a49a1a6
Author: David Sparks <sparks05@...ton.me>
Date:   Sat Apr 11 17:02:07 2026 -0400

    src/stdlib/qsort.c: Use plain "char" for opaque data
    
    "unsigned char" is more typing and inconsistent with the libc
    interface conventions.  It's all internal, anyway.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index d0a19159..73be4961 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -42,9 +42,9 @@ static inline int pntz(size_t p[2]) {
 	return 0;
 }
 
-static void cycle(size_t width, unsigned char* ar[], int n)
+static void cycle(size_t width, char* ar[], int n)
 {
-	unsigned char tmp[256];
+	char tmp[256];
 	size_t l;
 	int i;
 
@@ -96,11 +96,13 @@ static inline void shr(size_t p[2], int n)
 #define AR_LEN  (16 * sizeof(size_t))
 #define AR_MASK (AR_LEN - 1)
 
-static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int pshift, size_t lp[])
+/* Sift head down in a single Leonardo tree */
+static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 {
-	unsigned char *rt, *lf;
-	unsigned char *ar[AR_LEN];
+	char *rt, *lf;
+	char *ar[AR_LEN];
 	int i = 1;
+	size_t width = lp[0];
 
 	ar[0] = head;
 	while (pshift > 1) {
@@ -122,12 +124,12 @@ static void sift(unsigned char *head, size_t width, cmpfun cmp, void *arg, int p
 	cycle(width, ar, i & AR_MASK);
 }
 
-static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
+static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
 {
-	unsigned char *stepson,
-	              *rt, *lf;
+	char *stepson, *rt, *lf;
 	size_t p[2];
-	unsigned char *ar[AR_LEN];
+	size_t width = lp[0];
+	char *ar[AR_LEN];
 	int i = 1;
 	int trail;
 
@@ -155,7 +157,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, void *arg, si
 	}
 	if (!trusty) {
 		cycle(width, ar, i & AR_MASK);
-		sift(head, width, cmp, arg, pshift, lp);
+		sift(head, cmp, arg, pshift, lp);
 	}
 }
 
@@ -163,7 +165,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 {
 	size_t lp[12*sizeof(size_t)];
 	size_t i, size = width * nel;
-	unsigned char *head, *high;
+	char *head, *high;
 	size_t p[2] = {1, 0};
 	int pshift = 1;
 	int trail;
@@ -178,14 +180,14 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 
 	while (head < high) {
 		if ((p[0] & 3) == 3) {
-			sift(head, width, cmp, arg, pshift, lp);
+			sift(head, cmp, arg, pshift, lp);
 			shr(p, 2);
 			pshift += 2;
 		} else {
 			if (lp[pshift - 1] >= high - head)
-				trinkle(head, width, cmp, arg, p, pshift, 0, lp);
+				trinkle(head, cmp, arg, p, pshift, 0, lp);
 			else
-				sift(head, width, cmp, arg, pshift, lp);
+				sift(head, cmp, arg, pshift, lp);
 
 			if (pshift == 1) {
 				shl(p, 1);
@@ -200,7 +202,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 		head += width;
 	}
 
-	trinkle(head, width, cmp, arg, p, pshift, 0, lp);
+	trinkle(head, cmp, arg, p, pshift, 0, lp);
 
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if (pshift <= 1) {
@@ -212,10 +214,10 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 			pshift -= 2;
 			p[0] ^= 7;
 			shr(p, 1);
-			trinkle(head - lp[pshift] - width, width, cmp, arg, p, pshift + 1, 1, lp);
+			trinkle(head - lp[pshift] - width, cmp, arg, p, pshift + 1, 1, lp);
 			shl(p, 1);
 			p[0] |= 1;
-			trinkle(head - width, width, cmp, arg, p, pshift, 1, lp);
+			trinkle(head - width, cmp, arg, p, pshift, 1, lp);
 		}
 		head -= width;
 	}

commit 369484f5f1a0466f48f412ae6382aeeee3608d35
Author: David Sparks <sparks05@...ton.me>
Date:   Sat Apr 11 18:20:07 2026 -0400

    src/stdlib/qsort.c: Use bool for "trusty" flag
    
    More self-documenting.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 73be4961..73b4ffa9 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -26,6 +26,7 @@
 
 #define _BSD_SOURCE
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <string.h>
 
@@ -124,7 +125,7 @@ static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 	cycle(width, ar, i & AR_MASK);
 }
 
-static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift, int trusty, size_t lp[])
+static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift, bool trusty, size_t lp[])
 {
 	char *stepson, *rt, *lf;
 	size_t p[2];
@@ -153,7 +154,7 @@ static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift,
 		trail = pntz(p);
 		shr(p, trail);
 		pshift += trail;
-		trusty = 0;
+		trusty = false;
 	}
 	if (!trusty) {
 		cycle(width, ar, i & AR_MASK);
@@ -185,7 +186,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 			pshift += 2;
 		} else {
 			if (lp[pshift - 1] >= high - head)
-				trinkle(head, cmp, arg, p, pshift, 0, lp);
+				trinkle(head, cmp, arg, p, pshift, false, lp);
 			else
 				sift(head, cmp, arg, pshift, lp);
 
@@ -202,7 +203,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 		head += width;
 	}
 
-	trinkle(head, cmp, arg, p, pshift, 0, lp);
+	trinkle(head, cmp, arg, p, pshift, false, lp);
 
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if (pshift <= 1) {
@@ -214,10 +215,10 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 			pshift -= 2;
 			p[0] ^= 7;
 			shr(p, 1);
-			trinkle(head - lp[pshift] - width, cmp, arg, p, pshift + 1, 1, lp);
+			trinkle(head - lp[pshift] - width, cmp, arg, p, pshift + 1, true, lp);
 			shl(p, 1);
 			p[0] |= 1;
-			trinkle(head - width, cmp, arg, p, pshift, 1, lp);
+			trinkle(head - width, cmp, arg, p, pshift, true, lp);
 		}
 		head -= width;
 	}

commit 94653639ac019b16e6a3d2b6af235ccb92357963
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 09:54:06 2026 -0400

    src/stdlib/qsort.c: Move declarations into inner scopes
    
    We assume C99, so can even use "for (int i=0; ...)".
    Smaller scopes mean less to keep track of when reading the code.
    
    This includes a slight tweak to the initialization of the lp[]
    array of leonardo number sizes; hopefully the equivalence is clear.
    
diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 73b4ffa9..5952dc90 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -46,17 +46,15 @@ static inline int pntz(size_t p[2]) {
 static void cycle(size_t width, char* ar[], int n)
 {
 	char tmp[256];
-	size_t l;
-	int i;
 
 	if (n < 2)
 		return;
 
 	ar[n] = tmp;
 	while (width) {
-		l = sizeof(tmp) < width ? sizeof(tmp) : width;
+		size_t l = sizeof(tmp) < width ? sizeof(tmp) : width;
 		memcpy(ar[n], ar[0], l);
-		for (i = 0; i < n; i++) {
+		for (int i = 0; i < n; i++) {
 			memcpy(ar[i], ar[i + 1], l);
 			ar[i] += l;
 		}
@@ -100,15 +98,14 @@ static inline void shr(size_t p[2], int n)
 /* Sift head down in a single Leonardo tree */
 static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 {
-	char *rt, *lf;
 	char *ar[AR_LEN];
 	int i = 1;
 	size_t width = lp[0];
 
 	ar[0] = head;
 	while (pshift > 1) {
-		rt = head - width;
-		lf = head - width - lp[pshift - 2];
+		char *rt = head - width;
+		char *lf = head - width - lp[pshift - 2];
 
 		if (cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0)
 			break;
@@ -127,31 +124,26 @@ static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 
 static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift, bool trusty, size_t lp[])
 {
-	char *stepson, *rt, *lf;
-	size_t p[2];
+	size_t p[2] = { pp[0], pp[1] };
 	size_t width = lp[0];
 	char *ar[AR_LEN];
 	int i = 1;
-	int trail;
-
-	p[0] = pp[0];
-	p[1] = pp[1];
 
 	ar[0] = head;
 	while (p[0] != 1 || p[1] != 0) {
-		stepson = head - lp[pshift];
+		char *stepson = head - lp[pshift];
 		if (cmp(stepson, ar[0], arg) <= 0)
 			break;
 		if (!trusty && pshift > 1) {
-			rt = head - width;
-			lf = head - width - lp[pshift - 2];
+			char *rt = head - width;
+			char *lf = head - width - lp[pshift - 2];
 			if (cmp(rt, stepson, arg) >= 0 || cmp(lf, stepson, arg) >= 0)
 				break;
 		}
 
 		ar[i++ & AR_MASK] = stepson;
 		head = stepson;
-		trail = pntz(p);
+		int trail = pntz(p);
 		shr(p, trail);
 		pshift += trail;
 		trusty = false;
@@ -165,19 +157,18 @@ static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift,
 void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 {
 	size_t lp[12*sizeof(size_t)];
-	size_t i, size = width * nel;
-	char *head, *high;
+	size_t size = width * nel;
 	size_t p[2] = {1, 0};
 	int pshift = 1;
-	int trail;
 
 	if (!size) return;
 
-	head = base;
-	high = head + size - width;
+	char *head = base;
+	char *high = head + size - width;
 
 	/* Precompute Leonardo numbers, scaled by element width */
-	for (lp[0]=lp[1]=width, i=2; (lp[i]=lp[i-2]+lp[i-1]+width) < size; i++);
+	lp[1] = lp[0] = width;
+	for (size_t i=0, leo=width; (lp[i+2] = leo += lp[i]+width) < size; i++);
 
 	while (head < high) {
 		if ((p[0] & 3) == 3) {
@@ -207,7 +198,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if (pshift <= 1) {
-			trail = pntz(p);
+			int trail = pntz(p);
 			shr(p, trail);
 			pshift += trail;
 		} else {

commit 1c404d150004eed9e071639d5277d05aa7f137ed
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 09:57:10 2026 -0400

    src/stdlib/qsort.c: Add some comments
    
    Just a few more landmarks when reading the code.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 5952dc90..8d9a207a 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -95,7 +95,7 @@ static inline void shr(size_t p[2], int n)
 #define AR_LEN  (16 * sizeof(size_t))
 #define AR_MASK (AR_LEN - 1)
 
-/* Sift head down in a single Leonardo tree */
+/* Sift head down in a single Leonardo tree of size lp[pshift] */
 static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 {
 	char *ar[AR_LEN];
@@ -122,6 +122,12 @@ static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 	cycle(width, ar, i & AR_MASK);
 }
 
+/*
+ * Sift head down to its children *or* its stepson.
+ * If trusty is true, then then *head is presumed to already be
+ * sorted relative to its children; only the comparison with
+ * its stepson is required.
+ */
 static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift, bool trusty, size_t lp[])
 {
 	size_t p[2] = { pp[0], pp[1] };
@@ -170,6 +176,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 	lp[1] = lp[0] = width;
 	for (size_t i=0, leo=width; (lp[i+2] = leo += lp[i]+width) < size; i++);
 
+	/* Build the heap */
 	while (head < high) {
 		if ((p[0] & 3) == 3) {
 			sift(head, cmp, arg, pshift, lp);
@@ -196,6 +203,7 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 
 	trinkle(head, cmp, arg, p, pshift, false, lp);
 
+	/* Extract the elements */
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
 		if (pshift <= 1) {
 			int trail = pntz(p);

commit a9d1ca0dabe2a8fa2038c6890cf5deaeb131f3c5
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 10:04:08 2026 -0400

    src/stdlib/qsort.c: Save a comparison in sift()
    
    Calling the cmp() function is expensive, so compare left and
    right first, and then compare the head withthe greater of the
    two.  This eliminates one comparison per level in sift(), per level
    that the root is sifted down.
    
    The only downside is that the comparisons are now forced to be
    sequential, while before cmp(ar[0], lf) and cmp(ar[0], rt) could
    be done in parallel.
    
    (Doing the same in trinkle() is more complex, so holding off for now.)

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 8d9a207a..a6fa5acd 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -107,17 +107,16 @@ static void sift(char *head, cmpfun cmp, void *arg, int pshift, size_t lp[])
 		char *rt = head - width;
 		char *lf = head - width - lp[pshift - 2];
 
-		if (cmp(ar[0], lf, arg) >= 0 && cmp(ar[0], rt, arg) >= 0)
-			break;
 		if (cmp(lf, rt, arg) >= 0) {
-			ar[i++ & AR_MASK] = lf;
 			head = lf;
 			pshift -= 1;
 		} else {
-			ar[i++ & AR_MASK] = rt;
 			head = rt;
 			pshift -= 2;
 		}
+		if (cmp(ar[0], head, arg) >= 0)
+			break;
+		ar[i++ & AR_MASK] = head;
 	}
 	cycle(width, ar, i & AR_MASK);
 }

commit 407367354dd8a7f86ea80c0db9fbec45a9e828a4
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 10:06:30 2026 -0400

    src/stdlib/qsort.c: Share more code when adding a new size-1 tree.
    
    Prefer branch-free code.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index a6fa5acd..18f15fa2 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -178,22 +178,20 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 	/* Build the heap */
 	while (head < high) {
 		if ((p[0] & 3) == 3) {
+			/* Merge two consecutive-sized trees */
 			sift(head, cmp, arg, pshift, lp);
 			shr(p, 2);
 			pshift += 2;
 		} else {
+			/* Append a new tree of size 1 */
 			if (lp[pshift - 1] >= high - head)
 				trinkle(head, cmp, arg, p, pshift, false, lp);
 			else
 				sift(head, cmp, arg, pshift, lp);
-
-			if (pshift == 1) {
-				shl(p, 1);
-				pshift = 0;
-			} else {
-				shl(p, pshift - 1);
-				pshift = 1;
-			}
+			/* The new tree is of order 1 unless order-1 already exists */
+			bool newshift = (pshift != 1);
+			shl(p, pshift - newshift);
+			pshift = newshift;
 		}
 
 		p[0] |= 1;

commit 86412c48acdd9063def5c44e807622b915533ed0
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 11:38:33 2026 -0400

    src/stdlib/qsort.c: Slightly simplify extraction loop.
    
    shifting p[] left two places, then right 1, is wasteful.
    A bit of rearranging can produce simpler, equivalent code.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index 18f15fa2..eef2bb39 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -202,21 +202,22 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 
 	/* Extract the elements */
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
+		head -= width;
 		if (pshift <= 1) {
 			int trail = pntz(p);
 			shr(p, trail);
 			pshift += trail;
 		} else {
-			shl(p, 2);
-			pshift -= 2;
-			p[0] ^= 7;
-			shr(p, 1);
-			trinkle(head - lp[pshift] - width, cmp, arg, p, pshift + 1, true, lp);
+			/* Remove the root of a non-trivial tree */
 			shl(p, 1);
+			pshift -= 1;
+			p[0] ^= 3;
+			trinkle(head - lp[pshift-1], cmp, arg, p, pshift, true, lp);
+			shl(p, 1);
+			pshift -= 1;
 			p[0] |= 1;
-			trinkle(head - width, cmp, arg, p, pshift, true, lp);
+			trinkle(head, cmp, arg, p, pshift, true, lp);
 		}
-		head -= width;
 	}
 }
 

commit 100504b5d36eb66e00b403d17d5a7420d2a1cd6d
Author: David Sparks <sparks05@...ton.me>
Date:   Sun Apr 12 11:35:42 2026 -0400

    src/stdlib/qsort.c: Reorganize right shifts.
    
    Right shift of the p[] state are almost always part of the following
    pattern:
            int trail = pntz(p);
            shr(p, trail);
            pshift += trail;
    which nrmalizes p[] by shifting right until its lsbit is again
    set, and adds that amount to the pshift variable.  Wrap this all up
    in a single function which allows:
            pshift += shr(p);
    
    The one issus is naming.  There is one exception to this, which
    arises when building the heap and merging two trees, and we know
    a priori that the shift distance is 2.
    
    More specifically, the operation begins with p[0] having lsbits
    of ...011.  We merge those two trees to make a larger one, changing
    to ...100, then normalize, which causes a shift right of 2.  Using
    the general operation for this would be possible, but a waste.
    
    Using tha name "shr" for the full operation means this one shift must
    be expanded inline, which is a bit ugly, but I can't think of a
    better name for the combined pntz + shr operation.

diff --git a/src/stdlib/qsort.c b/src/stdlib/qsort.c
index eef2bb39..78781f01 100644
--- a/src/stdlib/qsort.c
+++ b/src/stdlib/qsort.c
@@ -35,14 +35,6 @@
 
 typedef int (*cmpfun)(const void *, const void *, void *);
 
-/* returns index of first bit set, excluding the low bit assumed to always
- * be set, starting from low bit of p[0] up through high bit of p[1] */
-static inline int pntz(size_t p[2]) {
-	if (p[0] != 1) return ntz(p[0] - 1);
-	if (p[1]) return 8*sizeof(size_t) + ntz(p[1]);
-	return 0;
-}
-
 static void cycle(size_t width, char* ar[], int n)
 {
 	char tmp[256];
@@ -62,7 +54,8 @@ static void cycle(size_t width, char* ar[], int n)
 	}
 }
 
-/* shl() and shr() need n > 0 */
+/* shl() needs n > 0.  Must always be followed by setting
+   the lsbit of p[] to maintain our normalization invariant. */
 static inline void shl(size_t p[2], int n)
 {
 	if (n >= 8 * sizeof(size_t)) {
@@ -76,17 +69,25 @@ static inline void shl(size_t p[2], int n)
 	p[0] <<= n;
 }
 
-static inline void shr(size_t p[2], int n)
+/* Clear the lsbit, then shift right until the lsbit
+   is again set, returning the number of bits shifted.
+   Assumes there is a new lsbit somewhere. */
+static inline int shr(size_t p[2])
 {
-	if (n >= 8 * sizeof(size_t)) {
-		n -= 8 * sizeof(size_t);
-		p[0] = p[1];
+	int trail;
+	size_t p0 = p[0] - 1;
+
+	if (p0 != 0) {
+		trail = ntz(p0);
+		p[0] = p0 >> trail | p[1] << (8*sizeof(size_t) - trail);
+		p[1] >>= trail;
+	} else {
+		trail = ntz(p[1]);
+		p[0] = p[1] >> trail;
 		p[1] = 0;
-		if (!n) return;
+		trail += 8*sizeof(size_t);
 	}
-	p[0] >>= n;
-	p[0] |= p[1] << (sizeof(size_t) * 8 - n);
-	p[1] >>= n;
+	return trail;
 }
 
 /* power-of-two length for working array so that we can mask indices and
@@ -148,9 +149,7 @@ static void trinkle(char *head, cmpfun cmp, void *arg, size_t pp[2], int pshift,
 
 		ar[i++ & AR_MASK] = stepson;
 		head = stepson;
-		int trail = pntz(p);
-		shr(p, trail);
-		pshift += trail;
+		pshift += shr(p);
 		trusty = false;
 	}
 	if (!trusty) {
@@ -180,7 +179,10 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 		if ((p[0] & 3) == 3) {
 			/* Merge two consecutive-sized trees */
 			sift(head, cmp, arg, pshift, lp);
-			shr(p, 2);
+			/* Shift p[] right 2 bits; equivalent to
+			   "p[0] ^= 7; pshift += shr(p);" */
+			p[0] = p[0] >> 2 | p[1] << (8*sizeof(size_t) - 2);
+			p[1] >>= 2;
 			pshift += 2;
 		} else {
 			/* Append a new tree of size 1 */
@@ -204,9 +206,8 @@ void __qsort_r(void *base, size_t nel, size_t width, cmpfun cmp, void *arg)
 	while (pshift != 1 || p[0] != 1 || p[1] != 0) {
 		head -= width;
 		if (pshift <= 1) {
-			int trail = pntz(p);
-			shr(p, trail);
-			pshift += trail;
+			/* Remove a tree of size 1 */
+			pshift += shr(p);
 		} else {
 			/* Remove the root of a non-trivial tree */
 			shl(p, 1);

Powered by blists - more mailing lists

Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.