|
Message-ID: <CAKGDhHU97_1cn3BBLvrN5reV5AdjHnNYYUntK0oCg=-pTaMk0g@mail.gmail.com> Date: Sat, 22 Aug 2015 10:42:42 +0200 From: Agnieszka Bielec <bielecagnieszka8@...il.com> To: john-dev@...ts.openwall.com Subject: Re: PHC: Argon2 on GPU 2015-08-22 3:48 GMT+02:00 Solar Designer <solar@...nwall.com>: > On Fri, Aug 21, 2015 at 05:40:42PM +0200, Agnieszka Bielec wrote: >> 2015-08-20 22:34 GMT+02:00 Solar Designer <solar@...nwall.com>: >> > You could start by experimenting with a much simpler than Argon2 yet in >> > some ways similar kernel: implement some trivial operation like XOR on >> > different vector widths and see whether/how this changes the assembly. >> > Then make it slightly less trivial (just enough to prevent the compiler >> > from optimizing things out) and add uses of private or local memory, >> > and see if you can make it run faster by using wider vectors per the >> > same private or local memory usage. >> >> I tested (only 960m) >> -copying memory from __private to __private >> - from __global to __private >> -xoring private tables with __prrivate tables >> >> using ulong, ulong2, ulong4, ulong8 (I was getting empty kernel using ulong16) > > Great. Where is the OpenCL code for these? I'd like to take a look at > what exactly you were testing. xoring: #define MOD 4 #define type ulong4 void func(type * table) { //prohibits optimizing code } __kernel void pomelo_crypt_kernel(__global const uchar * in, __global const uint * index, __global char *out, __global struct pomelo_salt *salt, __global type *S) { int i,j; uint gid; gid = get_global_id(0); S+=gid*1024/MOD; type copy1[1024/MOD]; type copy2[1024/MOD]; for(j=0;j<1024/MOD;j++) copy1[j]=S[1024/MOD-j]; for(j=0;j<1024/MOD;j++) copy2[j]=S[j]; for(i=0;i<1000;i++) { for(j=0;j<1024/MOD;j++) copy1[j]^=copy2[1024/MOD-j]; func(copy1); for(j=0;j<1024/MOD;j++) copy1[j]^=copy2[j]; func(copy1); } out[gid]=((ulong*)copy1)[0]; } copying from global to private: __kernel void pomelo_crypt_kernel(__global const uchar * in, __global const uint * index, __global char *out, __global struct pomelo_salt *salt, __global type *S) { int i,j; uint gid; gid = get_global_id(0); S+=gid*1024/MOD; type copy1[1024/MOD]; type copy2[1024/MOD]; for(i=0;i<1000;i++) { for(j=0;j<1024/MOD;j++) copy1[j]=S[1024/MOD-j]; func(copy1); for(j=0;j<1024/MOD;j++) copy1[j]=S[j]; func(copy1); } out[gid]=((ulong*)copy1)[0]; } copying from private to private. I didn't have defines yet: __kernel void pomelo_crypt_kernel(__global const uchar * in, __global const uint * index, __global char *out, __global struct pomelo_salt *salt, __global ulong2 *S) { int i,j; uint gid; gid = get_global_id(0); S+=gid*1024/2; ulong2 copy1[1024/2]; ulong2 copy2[1024/2]; for(i=0;i<1024/2;i++) { copy1[i]=S[i+0]; } for(i=0;i<1000;i++) { for(j=0;j<1024/2;j++) copy2[j]=copy1[1024/2-j]; func(copy2); for(j=0;j<1024/2;j++) copy1[j]=copy2[j]; func(copy1); } }
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.