|
Message-ID: <CABob6ionCJV3YneH+mozys-xc87SUddXERiT9jxgCeqaSC8zTA@mail.gmail.com> Date: Thu, 3 Sep 2015 23:36:28 +0200 From: Lukas Odzioba <lukas.odzioba@...il.com> To: john-dev@...ts.openwall.com Subject: Re: md5crypt-opencl 2015-09-02 19:32 GMT+02:00 Lukas Odzioba <lukas.odzioba@...il.com>: > 2015-09-02 19:04 GMT+02:00 Solar Designer <solar@...nwall.com>: >> Oh, is it possibly because the array is two-dimensional? Like some >> heuristic: "put all arrays with greater than one dimension in global >> memory". It is probably worth trying to turn the array into >> single-dimensional and see. > > Who knows, I'll be happy to give it a try. Performace is the same with 1 dimensional array, so I suppose that's not the way to go, but the code is not slightly simpler. >From what I recall there was no way to fit all ctx's with decent LWS value. Since some ctx's are more often used than the others my idea was to move those hot to the local memory and keep the rest in global. Another loose idea was to try to "preload" next ctx to the local memory and do writeback after that, but I have no idea whether it makes sense at all with not so long computations as we have in md5. Here is my patch, but I suppose it will be easier to modify current code and we should keep it. I am affraid that gmail web interface will break white characters... >From 5e33e933d1a3226d8edf52c50390a2905ae64639 Mon Sep 17 00:00:00 2001 From: ukasz <lukas.odzioba@...il.com> Date: Thu, 3 Sep 2015 23:27:44 +0200 Subject: [PATCH] Changed md5_ctx[8] to 1-dim table --- src/opencl/cryptmd5_kernel.cl | 124 ++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 64 deletions(-) diff --git a/src/opencl/cryptmd5_kernel.cl b/src/opencl/cryptmd5_kernel.cl index fc2b014..f387ede 100644 --- a/src/opencl/cryptmd5_kernel.cl +++ b/src/opencl/cryptmd5_kernel.cl @@ -106,10 +106,6 @@ typedef struct { uint v[4]; /** 128 bits **/ } crypt_md5_hash; -typedef struct { - uint buffer[16]; -} md5_ctx; - __constant uchar cl_md5_salt_prefix[] = "$1$"; __constant uchar cl_apr1_salt_prefix[] = "$apr1$"; __constant uchar g[] = @@ -205,52 +201,50 @@ inline void buf_update(uint * buf, uint a, uint b, uint c, uint d, uint offset) } #endif -inline void ctx_update(md5_ctx * ctx, uchar * string, uint len, +inline void ctx_update(uint *ctx_buffer, uchar * string, uint len, uint * ctx_buflen) { uint i; for (i = 0; i < len; i++) - PUTCHAR(ctx->buffer, *ctx_buflen + i, string[i]); + PUTCHAR(ctx_buffer, *ctx_buflen + i, string[i]); *ctx_buflen += len; } -inline void ctx_update_prefix(md5_ctx * ctx, uchar prefix, uint * ctx_buflen) +inline void ctx_update_prefix(uint *ctx_buffer, uchar prefix, uint * ctx_buflen) { uint i; if (prefix == '1') { for (i = 0; i < 3; i++) - PUTCHAR(ctx->buffer, *ctx_buflen + i, + PUTCHAR(ctx_buffer, *ctx_buflen + i, cl_md5_salt_prefix[i]); *ctx_buflen += 3; } else if (prefix == 'a') { for (i = 0; i < 6; i++) - PUTCHAR(ctx->buffer, *ctx_buflen + i, + PUTCHAR(ctx_buffer, *ctx_buflen + i, cl_apr1_salt_prefix[i]); *ctx_buflen += 6; } // else if (prefix == '\0') do nothing. for {smd5} } -inline void init_ctx(md5_ctx * ctx, uint * ctx_buflen) +inline void init_ctx(uint *ctx_buffer, uint * ctx_buflen) { uint i; - uint *buf = (uint *) ctx->buffer; #ifdef NVIDIA #pragma unroll 4 #endif - for (i = 0; i < sizeof(ctx->buffer) / 4; i++) - *buf++ = 0; + for (i = 0; i < 16; i++) + *ctx_buffer++ = 0; *ctx_buflen = 0; } -inline void md5_digest(md5_ctx * ctx, uint * result, uint len, +inline void md5_digest(uint *x, uint * result, uint len, uint res_offset) { - uint *x = ctx->buffer; uint a; uint b = 0xefcdab89; uint c = 0x98badcfe; @@ -351,7 +345,7 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, uint pass_len = inbuffer[idx].length; uint salt_len = hsalt->saltlen; uint alt_result[4]; - md5_ctx ctx[8]; + uint ctx_buffers[8*16];//8 buffers 16 uints each uint ctx_buflen[8]; union { uint w[4]; @@ -371,28 +365,30 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, salt.w[0] = ((__global uint *) & hsalt->salt)[0]; salt.w[1] = ((__global uint *) & hsalt->salt)[1]; +#define CTX(i) &ctx_buffers[i*16] + init_ctx(CTX(1), &ctx_buflen[1]); + ctx_update(CTX(1), pass.c, pass_len, &ctx_buflen[1]); + ctx_update(CTX(1), salt.c, salt_len, &ctx_buflen[1]); + ctx_update(CTX(1), pass.c, pass_len, &ctx_buflen[1]); + + PUTCHAR(CTX(1), ctx_buflen[1], 0x80); + + md5_digest(CTX(1), alt_result, ctx_buflen[1] << 3, 0); - init_ctx(&ctx[1], &ctx_buflen[1]); - ctx_update(&ctx[1], pass.c, pass_len, &ctx_buflen[1]); - ctx_update(&ctx[1], salt.c, salt_len, &ctx_buflen[1]); - ctx_update(&ctx[1], pass.c, pass_len, &ctx_buflen[1]); - PUTCHAR(ctx[1].buffer, ctx_buflen[1], 0x80); - md5_digest(&ctx[1], alt_result, ctx_buflen[1] << 3, 0); - - init_ctx(&ctx[1], &ctx_buflen[1]); - ctx_update(&ctx[1], pass.c, pass_len, &ctx_buflen[1]); - ctx_update_prefix(&ctx[1], hsalt->prefix, &ctx_buflen[1]); - ctx_update(&ctx[1], salt.c, salt_len, &ctx_buflen[1]); + init_ctx(CTX(1), &ctx_buflen[1]); + ctx_update(CTX(1), pass.c, pass_len, &ctx_buflen[1]); + ctx_update_prefix(CTX(1), hsalt->prefix, &ctx_buflen[1]); + ctx_update(CTX(1), salt.c, salt_len, &ctx_buflen[1]); #if PLAINTEXT_LENGTH >= 16 for (i = pass_len; i > 16; i -= 16) - ctx_update(&ctx[1], (uchar *) alt_result, 16, &ctx_buflen[1]); - ctx_update(&ctx[1], (uchar *) alt_result, i, &ctx_buflen[1]); + ctx_update(CTX(1), (uchar *) alt_result, 16, &ctx_buflen[1]); + ctx_update(CTX(1), (uchar *) alt_result, i, &ctx_buflen[1]); #else - ctx_update(&ctx[1], (uchar *) alt_result, pass_len, &ctx_buflen[1]); + ctx_update(CTX(1), (uchar *) alt_result, pass_len, &ctx_buflen[1]); #endif for (i = pass_len; i > 0; i >>= 1) { uchar c = (i & 1) ? 0 : pass.c[0]; - PUTCHAR(ctx[1].buffer, ctx_buflen[1], c); + PUTCHAR(CTX(1), ctx_buflen[1], c); ctx_buflen[1]++; } @@ -412,53 +408,53 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, altpos[3] = altpos[1] + salt_len; //prepare pattern buffers - init_ctx(&ctx[0], &ctx_buflen[0]); - PUTCHAR(ctx[1].buffer, ctx_buflen[1], 0x80); + init_ctx(CTX(0), &ctx_buflen[0]); + PUTCHAR(CTX(1), ctx_buflen[1], 0x80); //alt pass - md5_digest(&ctx[1], ctx[0].buffer, ctx_buflen[1] << 3, 0); //add results from init + md5_digest(CTX(1), CTX(0), ctx_buflen[1] << 3, 0); //add results from init ctx_buflen[0] = 16; for (i = 1; i < 8; i++) //1 not 0 - init_ctx(&ctx[i], &ctx_buflen[i]); + init_ctx(CTX(i), &ctx_buflen[i]); - ctx_update(&ctx[0], pass.c, pass_len, &ctx_buflen[0]); - PUTCHAR(ctx[0].buffer, ctx_buflen[0], 0x80); + ctx_update(CTX(0), pass.c, pass_len, &ctx_buflen[0]); + PUTCHAR(CTX(0), ctx_buflen[0], 0x80); //alt pass pass ctx_buflen[1] = 16; - ctx_update(&ctx[1], pass.c, pass_len, &ctx_buflen[1]); - ctx_update(&ctx[1], pass.c, pass_len, &ctx_buflen[1]); - PUTCHAR(ctx[1].buffer, ctx_buflen[1], 0x80); + ctx_update(CTX(1), pass.c, pass_len, &ctx_buflen[1]); + ctx_update(CTX(1), pass.c, pass_len, &ctx_buflen[1]); + PUTCHAR(CTX(1), ctx_buflen[1], 0x80); //alt salt pass ctx_buflen[2] = 16; - ctx_update(&ctx[2], salt.c, salt_len, &ctx_buflen[2]); - ctx_update(&ctx[2], pass.c, pass_len, &ctx_buflen[2]); - PUTCHAR(ctx[2].buffer, ctx_buflen[2], 0x80); + ctx_update(CTX(2), salt.c, salt_len, &ctx_buflen[2]); + ctx_update(CTX(2), pass.c, pass_len, &ctx_buflen[2]); + PUTCHAR(CTX(2), ctx_buflen[2], 0x80); //alt salt pass pass ctx_buflen[3] = 16; - ctx_update(&ctx[3], salt.c, salt_len, &ctx_buflen[3]); - ctx_update(&ctx[3], pass.c, pass_len, &ctx_buflen[3]); - ctx_update(&ctx[3], pass.c, pass_len, &ctx_buflen[3]); - PUTCHAR(ctx[3].buffer, ctx_buflen[3], 0x80); + ctx_update(CTX(3), salt.c, salt_len, &ctx_buflen[3]); + ctx_update(CTX(3), pass.c, pass_len, &ctx_buflen[3]); + ctx_update(CTX(3), pass.c, pass_len, &ctx_buflen[3]); + PUTCHAR(CTX(3), ctx_buflen[3], 0x80); //pass alt - ctx_update(&ctx[4], pass.c, pass_len, &ctx_buflen[4]); + ctx_update(CTX(4), pass.c, pass_len, &ctx_buflen[4]); ctx_buflen[4] += 16; - PUTCHAR(ctx[4].buffer, ctx_buflen[4], 0x80); + PUTCHAR(CTX(4), ctx_buflen[4], 0x80); //pass pass alt - ctx_update(&ctx[5], pass.c, pass_len, &ctx_buflen[5]); - ctx_update(&ctx[5], pass.c, pass_len, &ctx_buflen[5]); + ctx_update(CTX(5), pass.c, pass_len, &ctx_buflen[5]); + ctx_update(CTX(5), pass.c, pass_len, &ctx_buflen[5]); ctx_buflen[5] += 16; - PUTCHAR(ctx[5].buffer, ctx_buflen[5], 0x80); + PUTCHAR(CTX(5), ctx_buflen[5], 0x80); //pass salt alt - ctx_update(&ctx[6], pass.c, pass_len, &ctx_buflen[6]); - ctx_update(&ctx[6], salt.c, salt_len, &ctx_buflen[6]); + ctx_update(CTX(6), pass.c, pass_len, &ctx_buflen[6]); + ctx_update(CTX(6), salt.c, salt_len, &ctx_buflen[6]); ctx_buflen[6] += 16; - PUTCHAR(ctx[6].buffer, ctx_buflen[6], 0x80); + PUTCHAR(CTX(6), ctx_buflen[6], 0x80); //pass salt pass alt - ctx_update(&ctx[7], pass.c, pass_len, &ctx_buflen[7]); - ctx_update(&ctx[7], salt.c, salt_len, &ctx_buflen[7]); - ctx_update(&ctx[7], pass.c, pass_len, &ctx_buflen[7]); + ctx_update(CTX(7), pass.c, pass_len, &ctx_buflen[7]); + ctx_update(CTX(7), salt.c, salt_len, &ctx_buflen[7]); + ctx_update(CTX(7), pass.c, pass_len, &ctx_buflen[7]); ctx_buflen[7] += 16; - PUTCHAR(ctx[7].buffer, ctx_buflen[7], 0x80); + PUTCHAR(CTX(7), ctx_buflen[7], 0x80); #ifdef NVIDIA #pragma unroll 8 @@ -475,22 +471,22 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, for (i = 0; i < 500; i++) { #endif id2 = g[j]; - md5_digest(&ctx[id1], ctx[id2].buffer, ctx_buflen[id1], + md5_digest(CTX(id1), CTX(id2), ctx_buflen[id1], altpos[id2 - 4]); if (j == 41) j = (uint)-1; id1 = g[j + 1]; - md5_digest(&ctx[id2], ctx[id1].buffer, ctx_buflen[id2], 0); + md5_digest(CTX(id2), CTX(id1), ctx_buflen[id2], 0); #ifdef NVIDIA id2 = g[j + 2]; - md5_digest(&ctx[id1], ctx[id2].buffer, ctx_buflen[id1], + md5_digest(CTX(id1), CTX(id2), ctx_buflen[id1], altpos[id2 - 4]); if (j == 39) j = (uint)-3; id1 = g[j + 3]; j += 4; - md5_digest(&ctx[id2], ctx[id1].buffer, ctx_buflen[id2], 0); + md5_digest(CTX(id2), CTX(id1), ctx_buflen[id2], 0); #else j += 2; #endif @@ -500,5 +496,5 @@ __kernel void cryptmd5(__global const crypt_md5_password * inbuffer, #pragma unroll 4 #endif for (i = 0; i < 4; i++) - outbuffer[idx].v[i] = ctx[3].buffer[i]; + outbuffer[idx].v[i] = ctx_buffers[3*16+i]; } -- 1.9.1 Thanks, Lukas
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.