diff --git a/src/internal/dynlink.h b/src/internal/dynlink.h
index 53661d6..8621d2d 100644
--- a/src/internal/dynlink.h
+++ b/src/internal/dynlink.h
@@ -51,7 +51,7 @@ enum {
 #define AUX_CNT 32
 #define DYN_CNT 32
 
-typedef void (*stage2_func)(unsigned char *);
+typedef void (*stage2_func)(unsigned char *, size_t *);
 typedef _Noreturn void (*stage3_func)(size_t *);
 
 #endif
diff --git a/src/ldso/dlstart.c b/src/ldso/dlstart.c
index 5f84465..6af5037 100644
--- a/src/ldso/dlstart.c
+++ b/src/ldso/dlstart.c
@@ -56,12 +56,15 @@ void _dlstart_c(size_t *sp, size_t *dynv)
 		for (i=0; i<local_cnt; i++) got[i] += (size_t)base;
 	}
 
-	size_t *rel, rel_size;
+	size_t *rel, rel_size, symbolic_rel_cnt=0;
 
 	rel = (void *)(base+dyn[DT_REL]);
 	rel_size = dyn[DT_RELSZ];
 	for (; rel_size; rel+=2, rel_size-=2*sizeof(size_t)) {
-		if (!IS_RELATIVE(rel[1])) continue;
+		if (!IS_RELATIVE(rel[1])) {
+			symbolic_rel_cnt++;
+			continue;
+		}
 		size_t *rel_addr = (void *)(base + rel[0]);
 		*rel_addr += (size_t)base;
 	}
@@ -74,6 +77,16 @@ void _dlstart_c(size_t *sp, size_t *dynv)
 		*rel_addr = (size_t)base + rel[2];
 	}
 
+	/* Prepare storage for stages 2 to save clobbered REL
+	 * addends so they can be reused in stage 3. There should
+	 * be very few. If something goes wrong and there are a
+	 * huge number, pass a null pointer to trigger stage 2
+	 * to abort instead of risking stack overflow. */
+	int too_many_addends = symbolic_rel_cnt > 4096;
+	size_t naddends = too_many_addends ? 1 : symbolic_rel_cnt;
+	size_t addends[naddends];
+	size_t *paddends = too_many_addends ? 0 : addends;
+
 	const char *strings = (void *)(base + dyn[DT_STRTAB]);
 	const Sym *syms = (void *)(base + dyn[DT_SYMTAB]);
 
@@ -84,7 +97,7 @@ void _dlstart_c(size_t *sp, size_t *dynv)
 		 && s[3]=='l' && s[4]=='s' && s[5]=='2' && !s[6])
 			break;
 	}
-	((stage2_func)(base + syms[i].st_value))(base);
+	((stage2_func)(base + syms[i].st_value))(base, paddends);
 
 	/* Call dynamic linker stage-3, __dls3 */
 	for (i=0; ;i++) {
diff --git a/src/ldso/dynlink.c b/src/ldso/dynlink.c
index 93595a0..bfc1c96 100644
--- a/src/ldso/dynlink.c
+++ b/src/ldso/dynlink.c
@@ -74,7 +74,6 @@ struct dso {
 	volatile int new_dtv_idx, new_tls_idx;
 	struct td_index *td_index;
 	struct dso *fini_next;
-	int rel_early_relative, rel_update_got;
 	char *shortname;
 	char buf[];
 };
@@ -98,6 +97,7 @@ static struct builtin_tls {
 
 static struct dso ldso;
 static struct dso *head, *tail, *fini_head;
+static size_t *saved_addends;
 static char *env_path, *sys_path;
 static unsigned long long gencnt;
 static int runtime;
@@ -256,9 +256,19 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 	size_t sym_val;
 	size_t tls_val;
 	size_t addend;
+	int skip_relative = 0, reuse_addends = 0, save_slot = 0;
+
+	if (dso == &ldso) {
+		size_t dyn[DYN_CNT];
+		decode_vec(ldso.dynv, dyn, DYN_CNT);
+		/* Only ldso's REL table needs addend saving/reuse. */
+		if (rel == (size_t *)(ldso.base+dyn[DT_REL]))
+			reuse_addends = 1;
+		skip_relative = 1;
+	}
 
 	for (; rel_size; rel+=stride, rel_size-=stride*sizeof(size_t)) {
-		if (dso->rel_early_relative && IS_RELATIVE(rel[1])) continue;
+		if (skip_relative && IS_RELATIVE(rel[1])) continue;
 		type = R_TYPE(rel[1]);
 		sym_index = R_SYM(rel[1]);
 		reloc_addr = (void *)(base + rel[0]);
@@ -280,12 +290,20 @@ static void do_relocs(struct dso *dso, size_t *rel, size_t rel_size, size_t stri
 			def.dso = dso;
 		}
 
-		int gotplt = (type == REL_GOT || type == REL_PLT);
-		if (dso->rel_update_got && !gotplt && stride==2) continue;
-
-		addend = stride>2 ? rel[2]
-			: gotplt || type==REL_COPY ? 0
-			: *reloc_addr;
+		if (stride > 2) {
+			addend = rel[2];
+		} else if (type==REL_GOT || type==REL_PLT|| type==REL_COPY) {
+			addend = 0;
+		} else if (reuse_addends) {
+			/* Save original addend in stage 2 where the dso
+			 * chain consists of just ldso; otherwise read back
+			 * saved addend since the inline one was clobbered. */
+			if (head==&ldso)
+				saved_addends[save_slot] = *reloc_addr;
+			addend = saved_addends[save_slot++];
+		} else {
+			addend = *reloc_addr;
+		}
 
 		sym_val = def.sym ? (size_t)def.dso->base+def.sym->st_value : 0;
 		tls_val = def.sym ? def.sym->st_value : 0;
@@ -879,7 +897,7 @@ static void do_mips_relocs(struct dso *p, size_t *got)
 	size_t i, j, rel[2];
 	unsigned char *base = p->base;
 	i=0; search_vec(p->dynv, &i, DT_MIPS_LOCAL_GOTNO);
-	if (p->rel_early_relative) {
+	if (p==&ldso) {
 		got += i;
 	} else {
 		while (i--) *got++ += (size_t)base;
@@ -1116,16 +1134,16 @@ static void update_tls_size()
  * linker itself, but some of the relocations performed may need to be
  * replaced later due to copy relocations in the main program. */
 
-void __dls2(unsigned char *base)
+void __dls2(unsigned char *base, size_t *addends)
 {
 	Ehdr *ehdr = (void *)base;
+	if (!(saved_addends = addends)) a_crash();
 	ldso.base = base;
 	ldso.name = ldso.shortname = "libc.so";
 	ldso.global = 1;
 	ldso.phnum = ehdr->e_phnum;
 	ldso.phdr = (void *)(base + ehdr->e_phoff);
 	ldso.phentsize = ehdr->e_phentsize;
-	ldso.rel_early_relative = 1;
 	kernel_mapped_dso(&ldso);
 	decode_dyn(&ldso);
 
@@ -1133,7 +1151,6 @@ void __dls2(unsigned char *base)
 	reloc_all(&ldso);
 
 	ldso.relocated = 0;
-	ldso.rel_update_got = 1;
 }
 
 /* Stage 3 of the dynamic linker is called with the dynamic linker/libc