diff -urpN jumbo.orig/src/opencl/pbkdf2_hmac_sha256_unsplit_kernel.cl jumbo/src/opencl/pbkdf2_hmac_sha256_unsplit_kernel.cl --- jumbo.orig/src/opencl/pbkdf2_hmac_sha256_unsplit_kernel.cl 2013-08-07 23:31:28.013756405 +0000 +++ jumbo/src/opencl/pbkdf2_hmac_sha256_unsplit_kernel.cl 2013-08-10 23:53:31.926309501 +0000 @@ -8,6 +8,7 @@ #define uint32_t unsigned int #define PLAINTEXT_LENGTH 55 +#define MIN(a,b) (((a)<(b))?(a):(b)) #if gpu_amd(DEVICE_INFO) #define Ch(x, y, z) bitselect(z, y, x) @@ -257,6 +258,13 @@ typedef struct { uint32_t rounds; /** 12000 by default **/ } salt_t; +typedef struct { + uint32_t ipad[8]; + uint32_t opad[8]; + uint32_t hash[8]; + uint32_t W[8]; + uint32_t rounds; +} state_t; inline void preproc(__global const uint8_t * key, uint32_t keylen, uint32_t * state, uint32_t padding) @@ -381,17 +389,25 @@ inline void hmac_sha256(uint32_t * outpu -inline void big_hmac_sha256(uint32_t * input, uint32_t rounds, - uint32_t * ipad_state, uint32_t * opad_state, uint32_t * tmp_out) -{ - int i, round; - uint32_t W[16]; - uint32_t A, B, C, D, E, F, G, H, t; - for (i = 0; i < 8; i++) - W[i] = input[i]; +__kernel void pbkdf2_sha256_loop(__global state_t *state, __global crack_t *out) +{ + uint idx = get_global_id(0); + uint i, round, rounds = state[idx].rounds; + uint W[16]; + uint ipad_state[8]; + uint opad_state[8]; + uint tmp_out[8]; + uint A, B, C, D, E, F, G, H, t; + + for (i = 0; i < 8; i++) { + W[i] = state[idx].W[i]; + ipad_state[i] = state[idx].ipad[i]; + opad_state[i] = state[idx].opad[i]; + tmp_out[i] = state[idx].hash[i]; + } - for (round = 1; round < rounds; round++) { + for (round = 0; round < MIN(rounds,HASH_LOOPS); round++) { A = ipad_state[0]; B = ipad_state[1]; @@ -466,33 +482,45 @@ inline void big_hmac_sha256(uint32_t * i tmp_out[7] ^= H; } - - for (i = 0; i < 8; i++) - tmp_out[i] = SWAP(tmp_out[i]); + if(rounds >= HASH_LOOPS){ // there is still work to do + state[idx].rounds = rounds - HASH_LOOPS; + for(i = 0; i < 8; i++) { + state[idx].hash[i] = tmp_out[i]; + state[idx].W[i] = W[i]; + } + } + else { // rounds == 0 - we're done + for (i = 0; i < 8; i++) + out[idx].hash[i] = SWAP(tmp_out[i]); + } } + + __kernel void pbkdf2_sha256_kernel(__global const pass_t * inbuffer, - __global const salt_t * gsalt, __global crack_t * outbuffer) + __global const salt_t * gsalt, __global state_t * state) { - uint32_t ipad_state[8]; - uint32_t opad_state[8]; - uint32_t tmp_out[8]; - uint32_t i; - uint idx = get_global_id(0); - - __global const uint8_t *pass = inbuffer[idx].v; - __global const uint8_t *salt = gsalt->salt; - uint32_t passlen = inbuffer[idx].length; - uint32_t saltlen = gsalt->length; - uint32_t rounds = gsalt->rounds; + uint ipad_state[8]; + uint opad_state[8]; + uint tmp_out[8]; + uint i, idx = get_global_id(0); + + __global const uchar *pass = inbuffer[idx].v; + __global const uchar *salt = gsalt->salt; + uint passlen = inbuffer[idx].length; + uint saltlen = gsalt->length; + state[idx].rounds = gsalt->rounds - 1; preproc(pass, passlen, ipad_state, 0x36363636); preproc(pass, passlen, opad_state, 0x5c5c5c5c); hmac_sha256(tmp_out, ipad_state, opad_state, salt, saltlen); - big_hmac_sha256(tmp_out, rounds, ipad_state, opad_state, tmp_out); - for (i = 0; i < 8; i++) - outbuffer[idx].hash[i] = tmp_out[i]; + for(i=0; i < 8; i++) { + state[idx].ipad[i] = ipad_state[i]; + state[idx].opad[i] = opad_state[i]; + state[idx].hash[i] = tmp_out[i]; + state[idx].W[i] = tmp_out[i]; + } } diff -urpN jumbo.orig/src/opencl_pbkdf2_hmac_sha256_fmt.c jumbo/src/opencl_pbkdf2_hmac_sha256_fmt.c --- jumbo.orig/src/opencl_pbkdf2_hmac_sha256_fmt.c 2013-08-07 23:31:28.017756405 +0000 +++ jumbo/src/opencl_pbkdf2_hmac_sha256_fmt.c 2013-08-11 15:36:45.437721376 +0000 @@ -5,7 +5,6 @@ * * TODO: * auto LWS/GWS setup -* split kernel */ #include #include @@ -30,7 +29,7 @@ #define BINARY_ALIGN 4 #define SALT_ALIGN 1 -#define uint8_t unsigned char +#define uint8_t unsigned char #define uint32_t unsigned int #define PLAINTEXT_LENGTH 55 @@ -40,10 +39,11 @@ #define FMT_PREFIX "$pbkdf2-sha256$" #define KERNEL_NAME "pbkdf2_sha256_kernel" +#define SPLIT_KERNEL_NAME "pbkdf2_sha256_loop" #define CONFIG_NAME "pbkdf2_sha256" #define MIN(a,b) (((a)<(b))?(a):(b)) - +#define HASH_LOOPS 500 typedef struct { uint8_t length; @@ -59,6 +59,15 @@ typedef struct { uint8_t salt[64]; uint32_t rounds; } salt_t; + +typedef struct { + uint32_t ipad[8]; + uint32_t opad[8]; + uint32_t hash[8]; + uint32_t W[8]; + uint32_t rounds; +} state_t; + /* Testcases generated by passlib, format: $pbkdf2-256$rounds$salt$checksum salt and checksum are encoded in "adapted base64" @@ -84,6 +93,7 @@ static struct fmt_tests tests[] = { {"$pbkdf2-sha256$12000$iDFmDCHE2FtrDaGUEmKMEaL0Xqv1/t/b.x.DcC6lFEI$tUdEcw3csCnsfiYbFdXH6nvbftH8rzvBDl1nABeN0nE", "salt length = 32"}, {"$pbkdf2-sha256$12000$0zoHwNgbIwSAkDImZGwNQUjpHcNYa43xPqd0DuH8H0OIUWqttfY.h5DynvPeG.O8N.Y$.XK4LNIeewI7w9QF5g9p5/NOYMYrApW03bcv/MaD6YQ", "salt length = 50"}, {"$pbkdf2-sha256$12000$HGPMeS9lTAkhROhd653Tuvc.ZyxFSOk9x5gTYgyBEAIAgND6PwfAmA$WdCipc7O/9tTgbpZvcz.mAkIDkdrebVKBUgGbncvoNw", "salt length = 40"}, + {"$pbkdf2-sha256$12001$ay2F0No7p1QKgVAqpbQ2hg$UbKdswiLpjc5wT8Zl2M6VlE2cNiKuhAUntGciP8JjPw", "test"}, {NULL} }; @@ -92,45 +102,55 @@ static pass_t *host_pass; /** pl static salt_t *host_salt; /** salt **/ static crack_t *host_crack; /** hash**/ static cl_int cl_error; -static cl_mem mem_in, mem_out, mem_salt; +static cl_mem mem_in, mem_out, mem_salt, mem_state; +static cl_kernel split_kernel; static void create_clobj(int kpc, struct fmt_main *self) { - - host_pass = mem_calloc(kpc * sizeof(pass_t)); - host_crack = mem_calloc(kpc * sizeof(crack_t)); - host_salt = mem_calloc(sizeof(salt_t)); #define CL_RO CL_MEM_READ_ONLY #define CL_WO CL_MEM_WRITE_ONLY +#define CL_RW CL_MEM_READ_WRITE #define CLCREATEBUFFER(_flags, _size, _string)\ clCreateBuffer(context[ocl_gpu_id], _flags, _size, NULL, &cl_error);\ HANDLE_CLERROR(cl_error, _string); +#define CLKERNELARG(kernel, id, arg, msg)\ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg); + + host_pass = mem_calloc(kpc * sizeof(pass_t)); + host_crack = mem_calloc(kpc * sizeof(crack_t)); + host_salt = mem_calloc(sizeof(salt_t)); + mem_in = - CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t), - "Cannot allocate mem in"); + CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t), + "Cannot allocate mem in"); mem_salt = - CLCREATEBUFFER(CL_RO, sizeof(salt_t), "Cannot allocate mem salt"); + CLCREATEBUFFER(CL_RO, sizeof(salt_t), "Cannot allocate mem salt"); mem_out = - CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t), - "Cannot allocate mem out"); + CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t), + "Cannot allocate mem out"); + mem_state = + CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t), + "Cannot allocate mem state"); + + + CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in"); + CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt"); + CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state"); - HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 0, sizeof(mem_in), - &mem_in), "Error while setting mem_in"); - HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 1, sizeof(mem_salt), - &mem_salt), "Error while setting mem_salt"); - HANDLE_CLERROR(clSetKernelArg(crypt_kernel, 2, sizeof(mem_out), - &mem_out), "Error while setting mem_out"); + CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state"); + CLKERNELARG(split_kernel, 1 ,mem_out, "Error while setting mem_out"); } static void init(struct fmt_main *self) { cl_ulong maxsize; - - opencl_init("$JOHN/kernels/pbkdf2_hmac_sha256_unsplit_kernel.cl", - ocl_gpu_id, NULL); + char build_opts[64]; + snprintf(build_opts, sizeof(build_opts), "-DHASH_LOOPS=%u", HASH_LOOPS); + opencl_init("$JOHN/kernels/pbkdf2_hmac_sha256_unsplit_kernel.cl", + ocl_gpu_id, build_opts); local_work_size = global_work_size = 0; opencl_get_user_preferences(CONFIG_NAME); @@ -148,7 +168,11 @@ static void init(struct fmt_main *self) } crypt_kernel = clCreateKernel(program[ocl_gpu_id], KERNEL_NAME, &cl_error); - HANDLE_CLERROR(cl_error, "Error creating kernel"); + HANDLE_CLERROR(cl_error, "Error creating crypt kernel"); + + split_kernel = + clCreateKernel(program[ocl_gpu_id], SPLIT_KERNEL_NAME, &cl_error); + HANDLE_CLERROR(cl_error, "Error creating split kernel"); create_clobj(global_work_size, self); @@ -173,12 +197,15 @@ static void release_clobj(void) HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in"); HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt"); HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out"); + //Line below causes segfault + //HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state"); } static void done(void) { release_clobj(); - HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel"); + HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1"); + HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2"); HANDLE_CLERROR(clReleaseProgram(program[ocl_gpu_id]), "Release Program"); } @@ -307,7 +334,9 @@ static void opencl_limit_gws(int count) static int crypt_all(int *pcount, struct db_salt *salt) { - int count = *pcount; + int i, count = *pcount; + int loops = host_salt->rounds / HASH_LOOPS; + loops += host_salt->rounds % HASH_LOOPS > 0; opencl_limit_gws(count); #ifdef DEBUG @@ -329,6 +358,14 @@ static int crypt_all(int *pcount, struct profilingEvent), "Run kernel"); HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); + + for(i = 0; i < loops; i++) { + HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], split_kernel, + 1, NULL, &global_work_size, &local_work_size, 0, NULL, + profilingEvent), "Run split kernel"); + HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); + + } /// Read the result back HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, CL_FALSE, 0, global_work_size * sizeof(crack_t), host_crack, 0, @@ -336,14 +373,12 @@ static int crypt_all(int *pcount, struct /// Await completion of all the above HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); - return count; } static int cmp_all(void *binary, int count) { int i; - for (i = 0; i < count; i++) if (host_crack[i].hash[0] == ((uint32_t *) binary)[0]) return 1; diff -urpN jumbo.orig/src/opencl_pbkdf2_hmac_sha256_fmt.c~ jumbo/src/opencl_pbkdf2_hmac_sha256_fmt.c~ --- jumbo.orig/src/opencl_pbkdf2_hmac_sha256_fmt.c~ 1970-01-01 00:00:00.000000000 +0000 +++ jumbo/src/opencl_pbkdf2_hmac_sha256_fmt.c~ 2013-08-11 15:35:24.973973584 +0000 @@ -0,0 +1,524 @@ +/* +* This software is Copyright (c) 2013 Lukas Odzioba +* and it is hereby released to the general public under the following terms: +* Redistribution and use in source and binary forms, with or without modification, are permitted. +* +* TODO: +* auto LWS/GWS setup +*/ +#include +#include +#include +#include "misc.h" +#include "arch.h" +#include "base64.h" +#include "common.h" +#include "formats.h" +#include "options.h" +#include "common-opencl.h" + +#define FORMAT_LABEL "PBKDF2-HMAC-SHA256-opencl" +#define FORMAT_NAME "OpenCL" +#define ALGORITHM_NAME "PBKDF2-HMAC-SHA256" + +#define BENCHMARK_COMMENT "" +#define BENCHMARK_LENGTH -1 +#define DEFAULT_LWS 64 +#define DEFAULT_GWS (64*256) + +#define BINARY_ALIGN 4 +#define SALT_ALIGN 1 + +#define uint8_t unsigned char +#define uint32_t unsigned int + +#define PLAINTEXT_LENGTH 55 +#define SALT_LENGTH 50 +#define BINARY_SIZE 32 +#define SALT_SIZE sizeof(salt_t) + +#define FMT_PREFIX "$pbkdf2-sha256$" +#define KERNEL_NAME "pbkdf2_sha256_kernel" +#define SPLIT_KERNEL_NAME "pbkdf2_sha256_loop" +#define CONFIG_NAME "pbkdf2_sha256" + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define HASH_LOOPS 500 + +typedef struct { + uint8_t length; + uint8_t v[PLAINTEXT_LENGTH]; +} pass_t; + +typedef struct { + uint32_t hash[8]; +} crack_t; + +typedef struct { + uint8_t length; + uint8_t salt[64]; + uint32_t rounds; +} salt_t; + +typedef struct { + uint32_t ipad[8]; + uint32_t opad[8]; + uint32_t hash[8]; + uint32_t W[8]; + uint32_t rounds; +} state_t; + +/* + Testcases generated by passlib, format: $pbkdf2-256$rounds$salt$checksum + salt and checksum are encoded in "adapted base64" +*/ +static struct fmt_tests tests[] = { + + {"$pbkdf2-sha256$12000$2NtbSwkhRChF6D3nvJfSGg$OEWLc4keep8Vx3S/WnXgsfalb9q0RQdS1s05LfalSG4", ""}, + {"$pbkdf2-sha256$12000$fK8VAoDQuvees5ayVkpp7Q$xfzKAoBR/Iaa68tjn.O8KfGxV.zdidcqEeDoTFvDz2A", "1"}, + {"$pbkdf2-sha256$12000$GoMQYsxZ6/0fo5QyhtAaAw$xQ9L6toKn0q245SIZKoYjCu/Fy15hwGme9.08hBde1w", "12"}, + {"$pbkdf2-sha256$12000$6r3XWgvh/D/HeA/hXAshJA$11YY39OaSkJuwb.ONKVy5ebCZ00i5f8Qpcgwfe3d5kY", "123"}, + {"$pbkdf2-sha256$12000$09q711rLmbMWYgwBIGRMqQ$kHdAHlnQ1i1FHKBCPLV0sA20ai2xtYA1Ev8ODfIkiQg", "1234"}, + {"$pbkdf2-sha256$12000$Nebce08pJcT43zuHUMo5Rw$bMW/EsVqy8tMaDecFwuZNEPVfQbXBclwN78okLrxJoA", "openwall"}, + {"$pbkdf2-sha256$12000$mtP6/39PSQlhzBmDsJZS6g$zUXxf/9XBGrkedXVwhpC9wLLwwKSvHX39QRz7MeojYE", "password"}, + {"$pbkdf2-sha256$12000$35tzjhGi9J5TSilF6L0XAg$MiJA1gPN1nkuaKPVzSJMUL7ucH4bWIQetzX/JrXRYpw", "pbkdf2-sha256"}, + {"$pbkdf2-sha256$12000$sxbCeE8pxVjL2ds7hxBizA$uIiwKdo9DbPiiaLi1y3Ljv.r9G1tzxLRdlkD1uIOwKM", " 15 characters "}, + {"$pbkdf2-sha256$12000$CUGI8V7rHeP8nzMmhJDyXg$qjq3rBcsUgahqSO/W4B1bvsuWnrmmC4IW8WKMc5bKYE", " 16 characters__"}, + {"$pbkdf2-sha256$12000$FmIM4VxLaY1xLuWc8z6n1A$OVe6U1d5dJzYFKlJsZrW1NzUrfgiTpb9R5cAfn96WCk", " 20 characters______"}, + {"$pbkdf2-sha256$12000$fA8BAMAY41wrRQihdO4dow$I9BSCuV6UjG55LktTKbV.bIXtyqKKNvT3uL7JQwMLp8", " 24 characters______1234"}, + {"$pbkdf2-sha256$12000$/j8npJTSOmdMKcWYszYGgA$PbhiSNRzrELfAavXEsLI1FfitlVjv9NIB.jU1HHRdC8", " 28 characters______12345678"}, + {"$pbkdf2-sha256$12000$xfj/f6/1PkcIoXROCeE8Bw$ci.FEcPOKKKhX5b3JwzSDo6TGuYjgj1jKfCTZ9UpDM0", " 32 characters______123456789012"}, + {"$pbkdf2-sha256$12000$6f3fW8tZq7WWUmptzfmfEw$GDm/yhq1TnNR1MVGy73UngeOg9QJ7DtW4BnmV2F065s", " 40 characters______12345678901234567890"}, + {"$pbkdf2-sha256$12000$dU5p7T2ndM7535tzjpGyVg$ILbppLkipmonlfH1I2W3/vFMyr2xvCI8QhksH8DWn/M", " 55 characters______________________________________end"}, + {"$pbkdf2-sha256$12000$iDFmDCHE2FtrDaGUEmKMEaL0Xqv1/t/b.x.DcC6lFEI$tUdEcw3csCnsfiYbFdXH6nvbftH8rzvBDl1nABeN0nE", "salt length = 32"}, + {"$pbkdf2-sha256$12000$0zoHwNgbIwSAkDImZGwNQUjpHcNYa43xPqd0DuH8H0OIUWqttfY.h5DynvPeG.O8N.Y$.XK4LNIeewI7w9QF5g9p5/NOYMYrApW03bcv/MaD6YQ", "salt length = 50"}, + {"$pbkdf2-sha256$12000$HGPMeS9lTAkhROhd653Tuvc.ZyxFSOk9x5gTYgyBEAIAgND6PwfAmA$WdCipc7O/9tTgbpZvcz.mAkIDkdrebVKBUgGbncvoNw", "salt length = 40"}, + {"$pbkdf2-sha256$12001$ay2F0No7p1QKgVAqpbQ2hg$UbKdswiLpjc5wT8Zl2M6VlE2cNiKuhAUntGciP8JjPw","test"}, + {NULL} +}; + +//#define DEBUG +static pass_t *host_pass; /** plain ciphertexts **/ +static salt_t *host_salt; /** salt **/ +static crack_t *host_crack; /** hash**/ +static cl_int cl_error; +static cl_mem mem_in, mem_out, mem_salt, mem_state; +static cl_kernel split_kernel; + +static void create_clobj(int kpc, struct fmt_main *self) +{ +#define CL_RO CL_MEM_READ_ONLY +#define CL_WO CL_MEM_WRITE_ONLY +#define CL_RW CL_MEM_READ_WRITE + +#define CLCREATEBUFFER(_flags, _size, _string)\ + clCreateBuffer(context[ocl_gpu_id], _flags, _size, NULL, &cl_error);\ + HANDLE_CLERROR(cl_error, _string); + +#define CLKERNELARG(kernel, id, arg, msg)\ + HANDLE_CLERROR(clSetKernelArg(kernel, id, sizeof(arg), &arg), msg); + + host_pass = mem_calloc(kpc * sizeof(pass_t)); + host_crack = mem_calloc(kpc * sizeof(crack_t)); + host_salt = mem_calloc(sizeof(salt_t)); + + mem_in = + CLCREATEBUFFER(CL_RO, kpc * sizeof(pass_t), + "Cannot allocate mem in"); + mem_salt = + CLCREATEBUFFER(CL_RO, sizeof(salt_t), "Cannot allocate mem salt"); + mem_out = + CLCREATEBUFFER(CL_WO, kpc * sizeof(crack_t), + "Cannot allocate mem out"); + mem_state = + CLCREATEBUFFER(CL_RW, kpc * sizeof(state_t), + "Cannot allocate mem state"); + + + CLKERNELARG(crypt_kernel, 0, mem_in, "Error while setting mem_in"); + CLKERNELARG(crypt_kernel, 1, mem_salt, "Error while setting mem_salt"); + CLKERNELARG(crypt_kernel, 2, mem_state, "Error while setting mem_state"); + + CLKERNELARG(split_kernel, 0, mem_state, "Error while setting mem_state"); + CLKERNELARG(split_kernel, 1 ,mem_out, "Error while setting mem_out"); +} + + +static void init(struct fmt_main *self) +{ + cl_ulong maxsize; + char build_opts[64]; + snprintf(build_opts, sizeof(build_opts), "-DHASH_LOOPS=%u", HASH_LOOPS); + opencl_init("$JOHN/kernels/pbkdf2_hmac_sha256_unsplit_kernel.cl", + ocl_gpu_id, build_opts); + + local_work_size = global_work_size = 0; + opencl_get_user_preferences(CONFIG_NAME); + if (!local_work_size) { +#ifdef DEBUG + fprintf(stderr, "Forcing LWS = %d\n", DEFAULT_LWS); +#endif + local_work_size = DEFAULT_LWS; + } + if (!global_work_size) { +#ifdef DEBUG + fprintf(stderr, "Forcing GWS = %d\n", DEFAULT_GWS); +#endif + global_work_size = DEFAULT_GWS; + } + crypt_kernel = + clCreateKernel(program[ocl_gpu_id], KERNEL_NAME, &cl_error); + HANDLE_CLERROR(cl_error, "Error creating crypt kernel"); + + split_kernel = + clCreateKernel(program[ocl_gpu_id], SPLIT_KERNEL_NAME, &cl_error); + HANDLE_CLERROR(cl_error, "Error creating split kernel"); + + create_clobj(global_work_size, self); + + /* Note: we ask for the kernels' max sizes, not the device's! */ + HANDLE_CLERROR(clGetKernelWorkGroupInfo(crypt_kernel, + devices[ocl_gpu_id], CL_KERNEL_WORK_GROUP_SIZE, + sizeof(maxsize), &maxsize, NULL), "Query max workgroup size"); + while (local_work_size > maxsize) + local_work_size >>= 1; + + self->params.min_keys_per_crypt = local_work_size; + self->params.max_keys_per_crypt = global_work_size; +} + + +static void release_clobj(void) +{ + MEM_FREE(host_pass); + MEM_FREE(host_salt); + MEM_FREE(host_crack); + + HANDLE_CLERROR(clReleaseMemObject(mem_in), "Release mem in"); + HANDLE_CLERROR(clReleaseMemObject(mem_salt), "Release mem salt"); + HANDLE_CLERROR(clReleaseMemObject(mem_out), "Release mem out"); + //Line below causes segfault + //HANDLE_CLERROR(clReleaseMemObject(mem_state), "Release mem state"); +} + +static void done(void) +{ + release_clobj(); + HANDLE_CLERROR(clReleaseKernel(crypt_kernel), "Release kernel 1"); + HANDLE_CLERROR(clReleaseKernel(split_kernel), "Release kernel 2"); + HANDLE_CLERROR(clReleaseProgram(program[ocl_gpu_id]), + "Release Program"); +} + +static int isabase64(char a) +{ + int ret = 0; + if (a >= 'a' && a <= 'z') + ret = 1; + if (a >= 'A' && a <= 'Z') + ret = 1; + if (a >= '0' && a <= '9') + ret = 1; + if (a == '.' || a == '/') + ret = 1; + return ret; +} + +static int valid(char *ciphertext, struct fmt_main *pFmt) +{ + int saltlen = 0; + char *p, *c = ciphertext; + if (strncmp(ciphertext, FMT_PREFIX, strlen(FMT_PREFIX) != 0)) + return 0; + if (strlen(ciphertext) < 44 + strlen(FMT_PREFIX)) + return 0; + c += strlen(FMT_PREFIX); + if (strtol(c, NULL, 10) == 0) + return 0; + c = strchr(c, '$'); + if (c == NULL) + return 0; + c++; + p = strchr(c, '$'); + if (p == NULL) + return 0; + while (c < p) { + if (!isabase64(*c++)) + return 0; + saltlen++; + } + saltlen = saltlen * 3 / 4; + if (saltlen > SALT_LENGTH) + return 0; + c++; + if (strlen(c) != 43) + return 0; + while (*c) + if (!isabase64(*c++)) + return 0; + return 1; +} + +/* adapted base64 encoding used by passlib - s/./+/ and trim padding */ +static void abase64_decode(const char *in, int length, char *out) +{ + int i; + static char hash[70 + 1]; +#ifdef DEBUG + assert(length <= 70); + assert(length % 4 != 1); +#endif + memset(hash, '=', 70); + memcpy(hash, in, length); + for (i = 0; i < length; i++) + if (hash[i] == '.') + hash[i] = '+'; + switch (length % 4) { + case 2: + length += 2; + break; + case 3: + length++; + break; + } + hash[length] = 0; + base64_decode(hash, length, out); +} + +static void *passlib_binary(char *ciphertext) +{ + static char ret[256 / 8]; + char *c = ciphertext; + c += strlen(FMT_PREFIX) + 1; + c = strchr(c, '$') + 1; + c = strchr(c, '$') + 1; +#ifdef DEBUG + assert(strlen(c) == 43); +#endif + abase64_decode(c, 43, ret); + return ret; +} + +static void *binary(char *ciphertext) +{ + return passlib_binary(ciphertext); +} + +static void *get_salt(char *ciphertext) +{ + static salt_t salt; + char *p, *c = ciphertext, *oc; + c += strlen(FMT_PREFIX); + salt.rounds = strtol(c, NULL, 10); + c = strchr(c, '$') + 1; + p = strchr(c, '$'); + salt.length = 0; + oc = c; + while (c++ < p) + salt.length++; + abase64_decode(oc, salt.length, (char *)salt.salt); + salt.length = salt.length * 3 / 4; + return (void *)&salt; +} + +static void set_salt(void *salt) +{ + memcpy(host_salt, salt, SALT_SIZE); +} + +static void opencl_limit_gws(int count) +{ + global_work_size = + (count + local_work_size - 1) / local_work_size * local_work_size; +} + +static int crypt_all(int *pcount, struct db_salt *salt) +{ + int i, count = *pcount; + int loops = host_salt->rounds / HASH_LOOPS; + loops += host_salt->rounds % HASH_LOOPS > 0; + opencl_limit_gws(count); + +#ifdef DEBUG + printf("crypt_all(%d)\n", count); + printf("LWS = %d, GWS = %d\n", local_work_size, global_work_size); +#endif + + /// Copy data to gpu + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_in, + CL_FALSE, 0, global_work_size * sizeof(pass_t), host_pass, 0, + NULL, NULL), "Copy data to gpu"); + HANDLE_CLERROR(clEnqueueWriteBuffer(queue[ocl_gpu_id], mem_salt, + CL_FALSE, 0, sizeof(salt_t), host_salt, 0, NULL, NULL), + "Copy salt to gpu"); + + /// Run kernel + HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], crypt_kernel, + 1, NULL, &global_work_size, &local_work_size, 0, NULL, + profilingEvent), "Run kernel"); + HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); + + + for(i = 0; i < loops; i++) { + HANDLE_CLERROR(clEnqueueNDRangeKernel(queue[ocl_gpu_id], split_kernel, + 1, NULL, &global_work_size, &local_work_size, 0, NULL, + profilingEvent), "Run split kernel"); + HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); + + } + /// Read the result back + HANDLE_CLERROR(clEnqueueReadBuffer(queue[ocl_gpu_id], mem_out, + CL_FALSE, 0, global_work_size * sizeof(crack_t), host_crack, 0, + NULL, NULL), "Copy result back"); + + /// Await completion of all the above + HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "clFinish"); + return count; +} + +static int cmp_all(void *binary, int count) +{ + int i; + for (i = 0; i < count; i++) + if (host_crack[i].hash[0] == ((uint32_t *) binary)[0]) + return 1; + return 0; +} + +static int cmp_one(void *binary, int index) +{ + int i; + for (i = 0; i < 8; i++) + if (host_crack[index].hash[i] != ((uint32_t *) binary)[i]) + return 0; + return 1; +} + +static int cmp_exact(char *source, int index) +{ + return 1; +} + +static void set_key(char *key, int index) +{ + int saved_key_length = MIN(strlen(key), PLAINTEXT_LENGTH); + memcpy(host_pass[index].v, key, saved_key_length); + host_pass[index].length = saved_key_length; +} + +static char *get_key(int index) +{ + static char ret[PLAINTEXT_LENGTH + 1]; + memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH); + ret[MIN(host_pass[index].length, PLAINTEXT_LENGTH)] = 0; + return ret; +} + +static int binary_hash_0(void *binary) +{ +#ifdef DEBUG + puts("binary"); + uint32_t i, *b = binary; + for (i = 0; i < 8; i++) + printf("%08x ", b[i]); + puts(""); +#endif + return (((uint32_t *) binary)[0] & 0xf); +} + +static int get_hash_0(int index) +{ +#ifdef DEBUG + uint32_t i; + puts("get_hash"); + for (i = 0; i < 8; i++) + printf("%08x ", ((uint32_t *) host_crack[index].hash)[i]); + puts(""); +#endif + return host_crack[index].hash[0] & 0xf; +} + +static int get_hash_1(int index) +{ + return host_crack[index].hash[0] & 0xff; +} + +static int get_hash_2(int index) +{ + return host_crack[index].hash[0] & 0xfff; +} + +static int get_hash_3(int index) +{ + return host_crack[index].hash[0] & 0xffff; +} + +static int get_hash_4(int index) +{ + return host_crack[index].hash[0] & 0xfffff; +} + +static int get_hash_5(int index) +{ + return host_crack[index].hash[0] & 0xffffff; +} + +static int get_hash_6(int index) +{ + return host_crack[index].hash[0] & 0x7ffffff; +} + +struct fmt_main fmt_opencl_pbkdf2_hmac_sha256 = { +{ + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + PLAINTEXT_LENGTH, + BINARY_SIZE, + BINARY_ALIGN, + SALT_SIZE, + SALT_ALIGN, + 1, + 1, + FMT_CASE | FMT_8_BIT, + tests +}, { + init, + done, + fmt_default_reset, + fmt_default_prepare, + valid, + fmt_default_split, + binary, + get_salt, + fmt_default_source, + { + binary_hash_0, + fmt_default_binary_hash_1, + fmt_default_binary_hash_2, + fmt_default_binary_hash_3, + fmt_default_binary_hash_4, + fmt_default_binary_hash_5, + fmt_default_binary_hash_6 + }, + fmt_default_salt_hash, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { + get_hash_0, + get_hash_1, + get_hash_2, + get_hash_3, + get_hash_4, + get_hash_5, + get_hash_6 + }, + cmp_all, + cmp_one, + cmp_exact +}};