diff -urpN aa/john-1.7.6/src/x86-64.S john-1.7.6/src/x86-64.S --- aa/john-1.7.6/src/x86-64.S 2010-06-15 01:27:35.000000000 +0000 +++ john-1.7.6/src/x86-64.S 2010-09-02 00:20:06.595802029 +0000 @@ -1071,6 +1071,14 @@ DES_bs_crypt_LM_loop: * work on Solaris (unless the use of GNU assembler is forced). */ +/* Sun's assembler can't multiply, but at least it can add... */ +#define MUL_16(x) (x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x) +#define MUL_32(x) (MUL_16(x) + MUL_16(x)) +#define MUL_64(x) (MUL_32(x) + MUL_32(x)) +#define MUL_128(x) (MUL_64(x) + MUL_64(x)) +#define MUL_256(x) (MUL_128(x) + MUL_128(x)) +#define MUL_512(x) (MUL_256(x) + MUL_256(x)) + #ifdef UNDERSCORES #define nt_crypt_all_x86_64 _nt_crypt_all_x86_64 #define nt_buffer8x _nt_buffer8x @@ -1135,8 +1143,8 @@ const_stage3: #define t23 %xmm13 #define STEP1(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (512*base)+(x*32)+nt_buffer8x(%rip), aa; \ - paddd (512*base)+(x*32)+16+nt_buffer8x(%rip), aa3; \ + paddd MUL_512(base)+MUL_32(x)+nt_buffer8x(%rip), aa; \ + paddd MUL_512(base)+MUL_32(x)+16+nt_buffer8x(%rip), aa3; \ movdqa cc, t1; \ movdqa cc3, t13; \ pxor dd, t1; \ @@ -1157,8 +1165,8 @@ const_stage3: por t23, aa3; #define STEP2(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (512*base)+(x*32)+nt_buffer8x(%rip), aa; \ - paddd (512*base)+(x*32)+16+nt_buffer8x(%rip), aa3; \ + paddd MUL_512(base)+MUL_32(x)+nt_buffer8x(%rip), aa; \ + paddd MUL_512(base)+MUL_32(x)+16+nt_buffer8x(%rip), aa3; \ movdqa cc, t1; \ movdqa cc3, t13; \ movdqa cc, t2; \ @@ -1185,8 +1193,8 @@ const_stage3: por t13, aa3; #define STEP3(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (512*base)+(x*32)+nt_buffer8x(%rip), aa; \ - paddd (512*base)+(x*32)+16+nt_buffer8x(%rip), aa3; \ + paddd MUL_512(base)+MUL_32(x)+nt_buffer8x(%rip), aa; \ + paddd MUL_512(base)+MUL_32(x)+16+nt_buffer8x(%rip), aa3; \ movdqa dd, t1; \ movdqa dd3, t13; \ pxor cc, t1; \ @@ -1216,8 +1224,8 @@ const_stage3: movdqa const_init_d(%rip), d; \ movdqa const_init_d(%rip), d3; \ \ - paddd (512*base)+nt_buffer8x(%rip), a; \ - paddd (512*base)+16+nt_buffer8x(%rip), a3; \ + paddd MUL_512(base)+nt_buffer8x(%rip), a; \ + paddd MUL_512(base)+16+nt_buffer8x(%rip), a3; \ pslld $3, a; \ pslld $3, a3; \ \ @@ -1267,8 +1275,8 @@ const_stage3: STEP3(c, d, a, b, c3, d3, a3, b3, 5 , 11, base) \ movdqa a, t1; \ movdqa a3, t13; \ - paddd (512*base)+416+nt_buffer8x(%rip), b; \ - paddd (512*base)+416+16+nt_buffer8x(%rip), b3; \ + paddd MUL_512(base)+416+nt_buffer8x(%rip), b; \ + paddd MUL_512(base)+416+16+nt_buffer8x(%rip), b3; \ pxor d, t1; \ pxor d3,t13; \ pxor c, t1; \ @@ -1276,14 +1284,14 @@ const_stage3: paddd t1, b; \ paddd t13,b3; \ \ - movdqa a, (128*base)+output8x(%rip); \ - movdqa a3, (128*base)+16+output8x(%rip); \ - movdqa b, (128*base)+32+output8x(%rip); \ - movdqa b3, (128*base)+32+16+output8x(%rip); \ - movdqa c, (128*base)+64+output8x(%rip); \ - movdqa c3, (128*base)+64+16+output8x(%rip); \ - movdqa d, (128*base)+96+output8x(%rip); \ - movdqa d3, (128*base)+96+16+output8x(%rip); + movdqa a, MUL_128(base)+output8x(%rip); \ + movdqa a3, MUL_128(base)+16+output8x(%rip); \ + movdqa b, MUL_128(base)+32+output8x(%rip); \ + movdqa b3, MUL_128(base)+32+16+output8x(%rip); \ + movdqa c, MUL_128(base)+64+output8x(%rip); \ + movdqa c3, MUL_128(base)+64+16+output8x(%rip); \ + movdqa d, MUL_128(base)+96+output8x(%rip); \ + movdqa d3, MUL_128(base)+96+16+output8x(%rip); .text diff -urpN aa/john-1.7.6/src/x86-sse.S john-1.7.6/src/x86-sse.S --- aa/john-1.7.6/src/x86-sse.S 2010-06-15 01:29:35.000000000 +0000 +++ john-1.7.6/src/x86-sse.S 2010-09-02 00:24:47.063922107 +0000 @@ -1319,6 +1319,15 @@ DES_bs_crypt_LM_loop: * work on Solaris (unless the use of GNU assembler is forced). */ +/* Sun's assembler can't multiply, but at least it can add... */ +#define MUL_4(x) (x+x+x+x) +#define MUL_16(x) (x+x+x+x+x+x+x+x+x+x+x+x+x+x+x+x) +#define MUL_32(x) (MUL_16(x) + MUL_16(x)) +#define MUL_64(x) (MUL_32(x) + MUL_32(x)) +#define MUL_128(x) (MUL_64(x) + MUL_64(x)) +#define MUL_256(x) (MUL_128(x) + MUL_128(x)) +#define MUL_512(x) (MUL_256(x) + MUL_256(x)) + #ifdef UNDERSCORES #define nt_crypt_all_sse2 _nt_crypt_all_sse2 #define nt_buffer1x _nt_buffer1x @@ -1387,8 +1396,8 @@ const_stage3: #define Q3 $0x6ed9eba1 #define STEP1(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (256*base)+(x*16)+nt_buffer4x, aa; \ - addl (64*base)+(x*4)+nt_buffer1x, aa3; \ + paddd MUL_256(base)+MUL_16(x)+nt_buffer4x, aa; \ + addl MUL_64(base)+MUL_4(x)+nt_buffer1x, aa3; \ movdqa cc, t1; \ movl cc3, t13; \ pxor dd, t1; \ @@ -1406,8 +1415,8 @@ const_stage3: por t2, aa; #define STEP2(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (256*base)+(x*16)+nt_buffer4x, aa; \ - addl (64*base)+(x*4)+nt_buffer1x, aa3; \ + paddd MUL_256(base)+MUL_16(x)+nt_buffer4x, aa; \ + addl MUL_64(base)+MUL_4(x)+nt_buffer1x, aa3; \ movdqa cc, t1; \ movl cc3, t13; \ movdqa cc, t2; \ @@ -1431,8 +1440,8 @@ const_stage3: por t1, aa; #define STEP3(aa, bb, cc, dd, aa3, bb3, cc3, dd3, x, s, base) \ - paddd (256*base)+(x*16)+nt_buffer4x, aa; \ - addl (64*base)+(x*4)+nt_buffer1x, aa3; \ + paddd MUL_256(base)+MUL_16(x)+nt_buffer4x, aa; \ + addl MUL_64(base)+MUL_4(x)+nt_buffer1x, aa3; \ movdqa dd, t1; \ movl dd3, t13; \ pxor cc, t1; \ @@ -1459,8 +1468,8 @@ const_stage3: movdqa const_init_d, d; \ movl const_init_d, d3; \ \ - paddd (256*base)+nt_buffer4x, a; \ - addl (64*base)+nt_buffer1x, a3; \ + paddd MUL_256(base)+nt_buffer4x, a; \ + addl MUL_64(base)+nt_buffer1x, a3; \ pslld $3, a; \ roll $3, a3; \ \ @@ -1510,8 +1519,8 @@ const_stage3: STEP3(c, d, a, b, c3, d3, a3, b3, 5 , 11, base) \ movdqa a, t1; \ movl a3, t13; \ - paddd (256*base)+208+nt_buffer4x, b; \ - addl (64*base)+52+nt_buffer1x, b3; \ + paddd MUL_256(base)+208+nt_buffer4x, b; \ + addl MUL_64(base)+52+nt_buffer1x, b3; \ pxor d, t1; \ xorl d3,t13; \ pxor c, t1; \ @@ -1519,14 +1528,14 @@ const_stage3: paddd t1, b; \ addl t13,b3; \ \ - movdqa a, (64*base)+output4x; \ - movl a3, (16*base)+output1x; \ - movdqa b, (64*base)+16+output4x; \ - movl b3, (16*base)+4+output1x; \ - movdqa c, (64*base)+32+output4x; \ - movl c3, (16*base)+8+output1x; \ - movdqa d, (64*base)+48+output4x; \ - movl d3, (16*base)+12+output1x; + movdqa a, MUL_64(base)+output4x; \ + movl a3, MUL_16(base)+output1x; \ + movdqa b, MUL_64(base)+16+output4x; \ + movl b3, MUL_16(base)+4+output1x; \ + movdqa c, MUL_64(base)+32+output4x; \ + movl c3, MUL_16(base)+8+output1x; \ + movdqa d, MUL_64(base)+48+output4x; \ + movl d3, MUL_16(base)+12+output1x; .text