diff -urpN magnum-jumbo/doc/README-CUDA magnum-jumbo_sha256speedpatch//doc/README-CUDA --- magnum-jumbo/doc/README-CUDA 2012-07-09 03:34:59.113181538 +0000 +++ magnum-jumbo_sha256speedpatch//doc/README-CUDA 2012-07-10 02:57:40.395181700 +0000 @@ -22,8 +22,12 @@ Performance issues: For XSHA512[2]: CARD NAME BLOCKS THREADS SM RESULT GTX570 1600 256 ?? 67385K c/s + For RAWSHA256[1]: + CARD NAME BLOCKS THREADS SM RESULT + GTX570 7680 128 10 27561K c/s You can contact us at [1] lukas[dot]odzioba[at]gmail[dot]com [2] qqlddg[at]gmail[dot]com or john-dev mailing list +or irc #openwall@freenode diff -urpN magnum-jumbo/src/cuda/cuda_common.cu magnum-jumbo_sha256speedpatch//src/cuda/cuda_common.cu --- magnum-jumbo/src/cuda/cuda_common.cu 2012-07-09 03:34:59.133181927 +0000 +++ magnum-jumbo_sha256speedpatch//src/cuda/cuda_common.cu 2012-07-10 00:30:21.097181982 +0000 @@ -1,5 +1,5 @@ /* -* This software is Copyright (c) 2012 Lukas Odzioba +* This software is Copyright (c) 2011-2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. */ @@ -32,7 +32,7 @@ static char *human_format(size_t size) } assert(prefid <= 3); static char ret[32]; - sprintf(ret, "%zd.%zd %cB", size, (size%1024)/100, pref[prefid]); + sprintf(ret, "%zd.%zd %cB", size, (size % 1024) / 100, pref[prefid]); return ret; } @@ -76,9 +76,9 @@ void cuda_device_list() printf("\tNumber of multiprocessors: %d\n", devProp.multiProcessorCount); printf("\tClock rate: %d Mhz\n", - devProp.clockRate/1024); + devProp.clockRate / 1024); printf("\tTotal global memory: %s%s\n", - human_format(devProp.totalGlobalMem+200000000), + human_format(devProp.totalGlobalMem + 200000000), devProp.ECCEnabled ? " (ECC)" : ""); printf("\tTotal shared memory per block: %s\n", human_format(devProp.sharedMemPerBlock)); @@ -94,4 +94,28 @@ void cuda_device_list() } } +extern "C" +void *cuda_pageLockedMalloc(void *w, unsigned int size) +{ + HANDLE_ERROR(cudaHostAlloc((void **) &w, size, cudaHostAllocDefault)); + return w; +} + +extern "C" +void cuda_pageLockedFree(void *w) +{ + HANDLE_ERROR(cudaFreeHost(w)); +} + +/* cuda init must be called first to set device */ +extern "C" +int cuda_getAsyncEngineCount() +{ + cudaDeviceProp prop; + int dev; + cudaGetDevice(&dev); + cudaGetDeviceProperties(&prop,dev); + return prop.asyncEngineCount; + //if CUDA<4.0 we should use prop.overlapSupported +} #endif diff -urpN magnum-jumbo/src/cuda/cuda_common.cuh magnum-jumbo_sha256speedpatch//src/cuda/cuda_common.cuh --- magnum-jumbo/src/cuda/cuda_common.cuh 2012-07-09 03:34:59.133181927 +0000 +++ magnum-jumbo_sha256speedpatch//src/cuda/cuda_common.cuh 2012-07-10 00:11:15.493181411 +0000 @@ -1,5 +1,5 @@ /* -* This software is Copyright (c) 2011 Lukas Odzioba +* This software is Copyright (c) 2011-2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. */ @@ -15,4 +15,12 @@ extern "C" void cuda_init(unsigned int gpu_id); extern "C" void cuda_device_list(); -#endif \ No newline at end of file +extern "C" +void *cuda_pageLockedMalloc(void *w,unsigned int size); +extern "C" +void cuda_pageLockedFree(void *w); +extern "C" +int cuda_getAsyncEngineCount(); + +#endif + diff -urpN magnum-jumbo/src/cuda/rawsha256.cu magnum-jumbo_sha256speedpatch//src/cuda/rawsha256.cu --- magnum-jumbo/src/cuda/rawsha256.cu 2012-07-09 03:34:59.133181927 +0000 +++ magnum-jumbo_sha256speedpatch//src/cuda/rawsha256.cu 2012-07-10 02:51:24.527181841 +0000 @@ -1,85 +1,229 @@ -/** -This file is shared by cuda-rawsha224 and cuda-rawsha256 formats, -SHA256 definition is used to distinguish between them. +/* +* This software is Copyright (c) 2011-2012 Lukas Odzioba +* and it is hereby released to the general public under the following terms: +* Redistribution and use in source and binary forms, with or without modification, are permitted. +* This file is shared by raw-sha224-cuda and raw-sha256-cuda formats, +* SHA256 definition is used to distinguish between them. */ #include #include #include #include +#include "cuda_common.cuh" #include "../cuda_rawsha256.h" -static void cuda_rawsha256(sha256_password *,void *); +static void cuda_rawsha256(sha256_password *, void *, int); #ifdef SHA256 - #define SHA_HASH sha256_hash - __constant__ const uint32_t H[]={ - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19}; - extern "C" void gpu_rawsha256(sha256_password *i,SHA_HASH*o){cuda_rawsha256(i,o);} +#define SHA_HASH sha256_hash +__constant__ const uint32_t H[] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, + 0x1f83d9ab, 0x5be0cd19 +}; + +extern "C" void gpu_rawsha256(sha256_password * i, SHA_HASH * o, int lap) +{ + cuda_rawsha256(i, o, lap); +} #endif #ifdef SHA224 - #define SHA_HASH sha224_hash - __constant__ const uint32_t H[]={ - 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4}; - extern "C" void gpu_rawsha224(sha256_password *i,SHA_HASH*o){cuda_rawsha256(i,o);} +#define SHA_HASH sha224_hash +__constant__ const uint32_t H[] = { + 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, 0xffc00b31, 0x68581511, + 0x64f98fa7, 0xbefa4fa4 +}; + +extern "C" void gpu_rawsha224(sha256_password * i, SHA_HASH * o, int lap) +{ + cuda_rawsha256(i, o, lap); +} #endif -const uint32_t DATA_IN_SIZE=KEYS_PER_CRYPT*sizeof(sha256_password); -const uint32_t DATA_OUT_SIZE=KEYS_PER_CRYPT*sizeof(SHA_HASH); +const uint32_t DATA_IN_SIZE = KEYS_PER_CRYPT * sizeof(sha256_password); +const uint32_t DATA_OUT_SIZE = KEYS_PER_CRYPT * sizeof(SHA_HASH); + +static sha256_password *cuda_data = NULL; ///candidates +static SHA_HASH *cuda_data_out = NULL; ///sha256(candidate) or sha224(candidate) + +static cudaStream_t stream0, stream1, stream2; ///streams for async cuda calls -static sha256_password *cuda_data=NULL; ///candidates -static SHA_HASH *cuda_data_out=NULL; ///sha256(candidate) or sha224(candidate) +static sha256_password *cuda_data0 = NULL; ///candidates +static sha256_password *cuda_data1 = NULL; ///candidates +static sha256_password *cuda_data2 = NULL; ///candidates -__global__ void kernel_sha256(sha256_password *data,SHA_HASH *data_out); -static void cuda_rawsha256(sha256_password *host_in,void *out) +static SHA_HASH *cuda_data_out0 = NULL; ///sha256(candidates) +static SHA_HASH *cuda_data_out1 = NULL; ///sha256(candidates) +static SHA_HASH *cuda_data_out2 = NULL; ///sha256(candidates) + +__global__ void kernel_sha256(sha256_password * data, SHA_HASH * data_out); +static void cuda_rawsha256(sha256_password * host_in, void *out, int overlap) { - SHA_HASH* host_out = (SHA_HASH *)out; - ///Aloc memory and copy data to gpu - cudaMalloc(&cuda_data,DATA_IN_SIZE); - cudaMalloc(&cuda_data_out,DATA_OUT_SIZE); - cudaMemcpy(cuda_data,host_in,DATA_IN_SIZE,cudaMemcpyHostToDevice); - ///Run kernel and wait for execution end - kernel_sha256<<>>(cuda_data,cuda_data_out); - cudaThreadSynchronize(); - ///Free memory and copy results back - cudaMemcpy(host_out,cuda_data_out,DATA_OUT_SIZE,cudaMemcpyDeviceToHost); - cudaFree(cuda_data); - cudaFree(cuda_data_out); + if (overlap) { + HANDLE_ERROR(cudaMalloc(&cuda_data0, DATA_IN_SIZE / 3)); + HANDLE_ERROR(cudaMalloc(&cuda_data1, DATA_IN_SIZE / 3)); + HANDLE_ERROR(cudaMalloc(&cuda_data2, DATA_IN_SIZE / 3)); + HANDLE_ERROR(cudaMalloc(&cuda_data_out0, DATA_OUT_SIZE / 3)); + HANDLE_ERROR(cudaMalloc(&cuda_data_out1, DATA_OUT_SIZE / 3)); + HANDLE_ERROR(cudaMalloc(&cuda_data_out2, DATA_OUT_SIZE / 3)); + + HANDLE_ERROR(cudaStreamCreate(&stream0)); + HANDLE_ERROR(cudaStreamCreate(&stream1)); + HANDLE_ERROR(cudaStreamCreate(&stream2)); + + dim3 dimGrid(BLOCKS / 3); + dim3 dimBlock(THREADS); + + HANDLE_ERROR(cudaMemcpyAsync(cuda_data0, host_in, + DATA_IN_SIZE / 3, cudaMemcpyHostToDevice, stream0)); + kernel_sha256 <<< dimGrid, dimBlock, 0, + stream0 >>> (cuda_data0, cuda_data_out0); + + HANDLE_ERROR(cudaMemcpyAsync(cuda_data1, + host_in + KEYS_PER_CRYPT / 3, DATA_IN_SIZE / 3, + cudaMemcpyHostToDevice, stream1)); + kernel_sha256 <<< dimGrid, dimBlock, 0, + stream1 >>> (cuda_data1, cuda_data_out1); + + cudaMemcpyAsync(cuda_data2, host_in + 2 * KEYS_PER_CRYPT / 3, + DATA_IN_SIZE / 3, cudaMemcpyHostToDevice, stream2); + kernel_sha256 <<< dimGrid, dimBlock, 0, + stream2 >>> (cuda_data2, cuda_data_out2); + + HANDLE_ERROR(cudaMemcpyAsync((SHA_HASH *) out, cuda_data_out0, + DATA_OUT_SIZE / 3, cudaMemcpyDeviceToHost, stream0)); + HANDLE_ERROR(cudaMemcpyAsync((SHA_HASH *) out + + KEYS_PER_CRYPT / 3, cuda_data_out1, DATA_OUT_SIZE / 3, + cudaMemcpyDeviceToHost, stream1)); + HANDLE_ERROR(cudaMemcpyAsync((SHA_HASH *) out + + 2 * KEYS_PER_CRYPT / 3, cuda_data_out2, + DATA_OUT_SIZE / 3, cudaMemcpyDeviceToHost, stream2)); + + HANDLE_ERROR(cudaStreamSynchronize(stream0)); + HANDLE_ERROR(cudaStreamSynchronize(stream1)); + HANDLE_ERROR(cudaStreamSynchronize(stream2)); + + cudaStreamDestroy(stream0); + cudaStreamDestroy(stream1); + cudaStreamDestroy(stream2); + cudaFree(cuda_data0); + cudaFree(cuda_data1); + cudaFree(cuda_data2); + cudaFree(cuda_data_out0); + cudaFree(cuda_data_out1); + cudaFree(cuda_data_out2); + + } else { + SHA_HASH *host_out = (SHA_HASH *) out; + cudaMalloc(&cuda_data, DATA_IN_SIZE); + cudaMalloc(&cuda_data_out, DATA_OUT_SIZE); + cudaMemcpy(cuda_data, host_in, DATA_IN_SIZE, + cudaMemcpyHostToDevice); + + kernel_sha256 <<< BLOCKS, THREADS >>> (cuda_data, + cuda_data_out); + cudaThreadSynchronize(); + + cudaMemcpy(host_out, cuda_data_out, DATA_OUT_SIZE, + cudaMemcpyDeviceToHost); + cudaFree(cuda_data); + cudaFree(cuda_data_out); + } } - -__global__ void kernel_sha256(sha256_password *data,SHA_HASH* data_out){ /// todo - use shared memory - uint32_t idx = blockIdx.x*blockDim.x + threadIdx.x; - const uint32_t k[]={ - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; - uint32_t w[64]={0}; - SHA_HASH* out=&data_out[idx]; - - #pragma unroll 64 - for(uint32_t j=0;j<64;j++){ - if(j<16) w[j]=data[idx].v[j]; - else w[j]=sigma1(w[j-2])+w[j-7]+sigma0(w[j-15])+w[j-16]; - } - - uint32_t a=H[0];uint32_t b=H[1];uint32_t c=H[2];uint32_t d=H[3]; - uint32_t e=H[4];uint32_t f=H[5];uint32_t g=H[6];uint32_t h=H[7]; - #pragma unroll 64 - for(uint32_t j=0;j<64;j++){ - uint32_t t1=h+Sigma1(e)+Ch(e,f,g)+k[j]+w[j]; - uint32_t t2=Sigma0(a)+Maj(a,b,c); - h=g;g=f;f=e;e=d+t1;d=c;c=b;b=a;a=t1+t2; - } - out->v[0]=a+H[0];out->v[1]=b+H[1]; - out->v[2]=c+H[2];out->v[3]=d+H[3]; - out->v[4]=e+H[4];out->v[5]=f+H[5]; - out->v[6]=g+H[6]; - #ifdef SHA256 - out->v[7]=h+H[7]; - #endif -} \ No newline at end of file +__constant__ uint32_t k[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, + 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, + 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, + 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, + 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, + 0xbef9a3f7, 0xc67178f2 +}; + + /* highly unoptimal kernel */ +__global__ void kernel_sha256(sha256_password * data, SHA_HASH * data_out) +{ + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + + uint32_t w[64];//this should be limited do 16 uints + SHA_HASH *out = &data_out[idx]; + sha256_password *in = &data[idx]; + char dl = in->length; + unsigned char *key = in->v; + int j; + for (j = 0; j < 15; j++) + w[j] = 0; + for (j = 0; j < dl; j++) { + uint32_t tmp = 0; + tmp |= (((uint32_t) key[j]) << ((3 - (j & 0x3)) << 3)); + w[j / 4] |= tmp; + } + w[dl / 4] |= (((uint32_t) 0x80) << ((3 - (dl & 0x3)) << 3)); + w[15] = 0x00000000 | (dl * 8); + + + w[16] = sigma0(w[1]) + w[0]; + w[17] = sigma1(w[15]) + sigma0(w[2]) + w[1]; + w[18] = sigma1(w[16]) + sigma0(w[3]) + w[2]; + w[19] = sigma1(w[17]) + sigma0(w[4]) + w[3]; + w[20] = sigma1(w[18]) + sigma0(w[5]) + w[4]; + w[21] = sigma1(w[19]) + w[5]; + w[22] = sigma1(w[20]) + w[15]; + w[23] = sigma1(w[21]) + w[16]; + w[24] = sigma1(w[22]) + w[17]; + w[25] = sigma1(w[23]) + w[18]; + w[26] = sigma1(w[24]) + w[19]; + w[27] = sigma1(w[25]) + w[20]; + w[28] = sigma1(w[26]) + w[21]; + w[29] = sigma1(w[27]) + w[22]; + w[30] = sigma1(w[28]) + w[23] + sigma0(w[15]); + w[31] = sigma1(w[29]) + w[24] + sigma0(w[16]) + w[15]; + +#pragma unroll 32 + for (uint32_t j = 32; j < 64; j++) { + w[j] = + sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + w[j - + 16]; + } + + uint32_t a = H[0]; + uint32_t b = H[1]; + uint32_t c = H[2]; + uint32_t d = H[3]; + uint32_t e = H[4]; + uint32_t f = H[5]; + uint32_t g = H[6]; + uint32_t h = H[7]; +#pragma unroll 64 + for (uint32_t j = 0; j < 64; j++) { + uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + out->v[0] = a + H[0]; + out->v[1] = b + H[1]; + out->v[2] = c + H[2]; + out->v[3] = d + H[3]; + out->v[4] = e + H[4]; + out->v[5] = f + H[5]; + out->v[6] = g + H[6]; +#ifdef SHA256 + out->v[7] = h + H[7]; +#endif +} diff -urpN magnum-jumbo/src/cuda_rawsha256.h magnum-jumbo_sha256speedpatch//src/cuda_rawsha256.h --- magnum-jumbo/src/cuda_rawsha256.h 2012-07-09 03:34:59.135181841 +0000 +++ magnum-jumbo_sha256speedpatch//src/cuda_rawsha256.h 2012-07-10 02:25:52.917240709 +0000 @@ -1,5 +1,8 @@ -/** -This file is shared by cuda-rawsha224 and cuda-rawsha256 formats +/* +* This software is Copyright (c) 2011-2012 Lukas Odzioba +* and it is hereby released to the general public under the following terms: +* Redistribution and use in source and binary forms, with or without modification, are permitted. +* This file is shared by raw-sha224-cuda and raw-sha256-cuda formats */ #ifndef _SHA256_H #define _SHA256_H @@ -10,18 +13,19 @@ This file is shared by cuda-rawsha224 an #define rol(x,n) ((x << n) | (x >> (32-n))) #define ror(x,n) ((x >> n) | (x << (32-n))) -#define Ch(x,y,z) ((x & y) ^ ( (~x) & z)) -#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z)) +#define Ch(x,y,z) ( z ^ (x & ( y ^ z)) ) +#define Maj(x,y,z) ( (x & y) | (z & (x | y)) ) #define Sigma0(x) ((ror(x,2)) ^ (ror(x,13)) ^ (ror(x,22))) #define Sigma1(x) ((ror(x,6)) ^ (ror(x,11)) ^ (ror(x,25))) #define sigma0(x) ((ror(x,7)) ^ (ror(x,18)) ^(x>>3)) #define sigma1(x) ((ror(x,17)) ^ (ror(x,19)) ^(x>>10)) #define THREADS 128 -#define BLOCKS 256 +#define BLOCKS 256*30 /* it must be something divisible by 3 */ #define KEYS_PER_CRYPT THREADS*BLOCKS typedef struct{ - uint32_t v[16]; ///512bits + unsigned char v[19]; + unsigned char length; }sha256_password; typedef struct{ diff -urpN magnum-jumbo/src/cuda_rawsha256_fmt.c magnum-jumbo_sha256speedpatch//src/cuda_rawsha256_fmt.c --- magnum-jumbo/src/cuda_rawsha256_fmt.c 2012-07-09 03:34:59.135181841 +0000 +++ magnum-jumbo_sha256speedpatch//src/cuda_rawsha256_fmt.c 2012-07-10 03:41:52.929557759 +0000 @@ -1,8 +1,8 @@ /* -* This software is Copyright (c) 2011 Lukas Odzioba +* This software is Copyright (c) 2011-2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. -* This file is shared by cuda-rawsha224 and cuda-rawsha256 formats, +* This file is shared by raw-sha224-cuda and raw-sha256-cuda formats, * SHA256 definition is used to distinguish between them. */ #include @@ -14,246 +14,295 @@ #include "cuda_rawsha256.h" #define BENCHMARK_COMMENT "" -#define BENCHMARK_LENGTH -1 /// Raw benchmark -#define PLAINTEXT_LENGTH 54 +#define BENCHMARK_LENGTH -1 /// Raw benchmark +#define PLAINTEXT_LENGTH 19 #define SALT_SIZE 0 #define MIN_KEYS_PER_CRYPT KEYS_PER_CRYPT #define MAX_KEYS_PER_CRYPT KEYS_PER_CRYPT #ifdef SHA256 - #define FORMAT_LABEL "raw-sha256-cuda" - #define FORMAT_NAME "Raw SHA-256" - #define CIPHERTEXT_LENGTH 64 ///256bit - #define BINARY_SIZE 32 - #define SHA_HASH sha256_hash - #define TESTS sha256_tests - #define FMT_MAIN fmt_cuda_rawsha256 - static struct fmt_tests sha256_tests[]={ - {"a49c2c9d0c006c8cb55a9a7a38822b83e0cd442614cb416af952fa50156761dc","openwall"}, - {NULL} - }; +#define FORMAT_LABEL "raw-sha256-cuda" +#define FORMAT_NAME "Raw SHA-256" +#define CIPHERTEXT_LENGTH 64 ///256bit +#define BINARY_SIZE 32 +#define SHA_HASH sha256_hash +#define TESTS sha256_tests +#define FMT_MAIN fmt_cuda_rawsha256 +static struct fmt_tests sha256_tests[] = { + {"a49c2c9d0c006c8cb55a9a7a38822b83e0cd442614cb416af952fa50156761dc", + "openwall"}, + {NULL} +}; #endif #ifdef SHA224 - #define FORMAT_LABEL "raw-sha224-cuda" - #define FORMAT_NAME "Raw SHA-224" - #define CIPHERTEXT_LENGTH 56 ///224bit - #define BINARY_SIZE 32 - #define SHA_HASH sha224_hash - #define TESTS sha224_tests - #define FMT_MAIN fmt_cuda_rawsha224 - static struct fmt_tests sha224_tests[]={ - {"d6d8ff02342ea04cf65f8ab446b22c4064984c29fe86f858360d0319","openwall"}, - {NULL} - }; +#define FORMAT_LABEL "raw-sha224-cuda" +#define FORMAT_NAME "Raw SHA-224" +#define CIPHERTEXT_LENGTH 56 ///224bit +#define BINARY_SIZE 32 +#define SHA_HASH sha224_hash +#define TESTS sha224_tests +#define FMT_MAIN fmt_cuda_rawsha224 +static struct fmt_tests sha224_tests[] = { + {"d6d8ff02342ea04cf65f8ab446b22c4064984c29fe86f858360d0319", + "openwall"}, + {NULL} +}; #endif -#define ALGORITHM_NAME "CUDA, unreliable, may miss guesses" +#define ALGORITHM_NAME "CUDA" -extern void gpu_rawsha256(sha256_password *,SHA_HASH*); -extern void gpu_rawsha224(sha256_password *,SHA_HASH*); -static char saved_keys[MAX_KEYS_PER_CRYPT][PLAINTEXT_LENGTH+1]; /** plaintext ciphertexts **/ -static sha256_password *inbuffer; /** binary ciphertexts **/ -static SHA_HASH *outbuffer; /** calculated hashes **/ - -static void preproc(char *key, int index){ /// todo - move to gpu - uint32_t dl=strlen(key),j; - uint32_t *blocks = inbuffer[index].v; - memset(inbuffer[index].v,0,sizeof(sha256_password)); - for(j=0;j 0) { + overlap = 1; + inbuffer = + cuda_pageLockedMalloc(inbuffer, + sizeof(sha256_password) * MAX_KEYS_PER_CRYPT); + outbuffer = + cuda_pageLockedMalloc(outbuffer, + sizeof(SHA_HASH) * MAX_KEYS_PER_CRYPT); + } else { + overlap = 0; + //device does not support overlaping memcpy and kernel execution + inbuffer = + (sha256_password *) malloc(sizeof(sha256_password) * + MAX_KEYS_PER_CRYPT); + outbuffer = + (SHA_HASH *) malloc(sizeof(SHA_HASH) * MAX_KEYS_PER_CRYPT); + } + check_mem_allocation(inbuffer, outbuffer); + atexit(cleanup); } -static void init(struct fmt_main *pFmt){ - //Alocate memory for hashes and passwords - inbuffer=(sha256_password*)malloc(sizeof(sha256_password)*MAX_KEYS_PER_CRYPT); - outbuffer=(SHA_HASH*)malloc(sizeof(SHA_HASH)*MAX_KEYS_PER_CRYPT); - check_mem_allocation(inbuffer,outbuffer); - atexit(cleanup); - //Initialize CUDA - cuda_init(gpu_id); -} - -static int valid(char * ciphertext,struct fmt_main *pFmt){ - int i; - if(strlen(ciphertext)!=CIPHERTEXT_LENGTH) return 0; - for(i=0;i='0' && ciphertext[i]<='9')|| - (ciphertext[i]>='a' && ciphertext[i]<='f')|| - (ciphertext[i]>='A' && ciphertext[i]<='Z'))) - return 0; - } - return 1; +static int valid(char *ciphertext, struct fmt_main *pFmt) +{ + int i; + if (strlen(ciphertext) != CIPHERTEXT_LENGTH) + return 0; + for (i = 0; i < CIPHERTEXT_LENGTH; i++) { + if (!((ciphertext[i] >= '0' && ciphertext[i] <= '9') || + (ciphertext[i] >= 'a' && ciphertext[i] <= 'f') || + (ciphertext[i] >= 'A' && ciphertext[i] <= 'Z'))) + return 0; + } + return 1; }; -static void *binary(char *ciphertext){ - static char realcipher[BINARY_SIZE]; - int i; - memset(realcipher,0,BINARY_SIZE); - for(i=0;i