|
|
Message-ID: <CAKGDhHU97_1cn3BBLvrN5reV5AdjHnNYYUntK0oCg=-pTaMk0g@mail.gmail.com>
Date: Sat, 22 Aug 2015 10:42:42 +0200
From: Agnieszka Bielec <bielecagnieszka8@...il.com>
To: john-dev@...ts.openwall.com
Subject: Re: PHC: Argon2 on GPU
2015-08-22 3:48 GMT+02:00 Solar Designer <solar@...nwall.com>:
> On Fri, Aug 21, 2015 at 05:40:42PM +0200, Agnieszka Bielec wrote:
>> 2015-08-20 22:34 GMT+02:00 Solar Designer <solar@...nwall.com>:
>> > You could start by experimenting with a much simpler than Argon2 yet in
>> > some ways similar kernel: implement some trivial operation like XOR on
>> > different vector widths and see whether/how this changes the assembly.
>> > Then make it slightly less trivial (just enough to prevent the compiler
>> > from optimizing things out) and add uses of private or local memory,
>> > and see if you can make it run faster by using wider vectors per the
>> > same private or local memory usage.
>>
>> I tested (only 960m)
>> -copying memory from __private to __private
>> - from __global to __private
>> -xoring private tables with __prrivate tables
>>
>> using ulong, ulong2, ulong4, ulong8 (I was getting empty kernel using ulong16)
>
> Great. Where is the OpenCL code for these? I'd like to take a look at
> what exactly you were testing.
xoring:
#define MOD 4
#define type ulong4
void func(type * table)
{ //prohibits optimizing code
}
__kernel void pomelo_crypt_kernel(__global const uchar * in,
__global const uint * index,
__global char *out,
__global struct pomelo_salt *salt, __global type *S)
{
int i,j;
uint gid;
gid = get_global_id(0);
S+=gid*1024/MOD;
type copy1[1024/MOD];
type copy2[1024/MOD];
for(j=0;j<1024/MOD;j++)
copy1[j]=S[1024/MOD-j];
for(j=0;j<1024/MOD;j++)
copy2[j]=S[j];
for(i=0;i<1000;i++)
{
for(j=0;j<1024/MOD;j++)
copy1[j]^=copy2[1024/MOD-j];
func(copy1);
for(j=0;j<1024/MOD;j++)
copy1[j]^=copy2[j];
func(copy1);
}
out[gid]=((ulong*)copy1)[0];
}
copying from global to private:
__kernel void pomelo_crypt_kernel(__global const uchar * in,
__global const uint * index,
__global char *out,
__global struct pomelo_salt *salt, __global type *S)
{
int i,j;
uint gid;
gid = get_global_id(0);
S+=gid*1024/MOD;
type copy1[1024/MOD];
type copy2[1024/MOD];
for(i=0;i<1000;i++)
{
for(j=0;j<1024/MOD;j++)
copy1[j]=S[1024/MOD-j];
func(copy1);
for(j=0;j<1024/MOD;j++)
copy1[j]=S[j];
func(copy1);
}
out[gid]=((ulong*)copy1)[0];
}
copying from private to private. I didn't have defines yet:
__kernel void pomelo_crypt_kernel(__global const uchar * in,
__global const uint * index,
__global char *out,
__global struct pomelo_salt *salt, __global ulong2 *S)
{
int i,j;
uint gid;
gid = get_global_id(0);
S+=gid*1024/2;
ulong2 copy1[1024/2];
ulong2 copy2[1024/2];
for(i=0;i<1024/2;i++)
{
copy1[i]=S[i+0];
}
for(i=0;i<1000;i++)
{
for(j=0;j<1024/2;j++)
copy2[j]=copy1[1024/2-j];
func(copy2);
for(j=0;j<1024/2;j++)
copy1[j]=copy2[j];
func(copy1);
}
}
Powered by blists - more mailing lists
Confused about mailing lists and their use? Read about mailing lists on Wikipedia and check out these guidelines on proper formatting of your messages.