diff -urpN magnum-jumbo/src/cuda/pwsafe.cu magnum-jumbo_pwsafe2//src/cuda/pwsafe.cu --- magnum-jumbo/src/cuda/pwsafe.cu 2012-08-10 21:39:56.183783114 +0000 +++ magnum-jumbo_pwsafe2//src/cuda/pwsafe.cu 2012-08-11 18:58:37.769658633 +0000 @@ -1,5 +1,5 @@ /* -* This software is Copyright (c) 2012 Lukas Odzioba +* This software is Copyright (c) 2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. */ @@ -13,161 +13,155 @@ #define PWSAFE_IN_SIZE (KEYS_PER_CRYPT * sizeof(pwsafe_pass)) #define PWSAFE_OUT_SIZE (KEYS_PER_CRYPT * sizeof(pwsafe_hash)) #define PWSAFE_SALT_SIZE (sizeof(pwsafe_salt)) +__constant__ pwsafe_salt cuda_salt[1]; + +__constant__ uint32_t k[] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, + 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, + 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, + 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, + 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, + 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, + 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, + 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, + 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +__constant__ uint32_t H[] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, + 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; -__global__ void kernel_pwsafe(pwsafe_pass * in, pwsafe_salt * salt, - pwsafe_hash * out) -{ - uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; - uint32_t pl = in[idx].length, j, i; - const uint32_t k[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, - 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, - 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, - 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, - 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, - 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, - 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, - 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, - 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 - }; - - const uint32_t H[] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, - 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 - }; - - uint32_t w[64]; - for (i = 0; i < 14; i++) - w[i] = 0; - for (j = 0; j < pl; j++) { - uint32_t tmp = 0; - tmp |= (((uint32_t) in[idx].v[j]) << ((3 - (j & 0x3)) << 3)); - w[j / 4] |= tmp; - } - for (; j < 32 + pl; j++) { - uint32_t tmp = 0; - tmp |= - (((uint32_t) salt->salt[j - pl]) << ((3 - - (j & 0x3)) << 3)); - w[j / 4] |= tmp; - } - w[j / 4] |= (((uint32_t) 0x80) << ((3 - (j & 0x3)) << 3)); - w[15] = 0x00000000 | (j * 8); - - for (j = 16; j < 64; j++) { - w[j] = - sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + w[j - - 16]; - } - - uint32_t a = H[0]; - uint32_t b = H[1]; - uint32_t c = H[2]; - uint32_t d = H[3]; - uint32_t e = H[4]; - uint32_t f = H[5]; - uint32_t g = H[6]; - uint32_t h = H[7]; - for (uint32_t j = 0; j < 64; j++) { - uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; - uint32_t t2 = Sigma0(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + t1; - d = c; - c = b; - b = a; - a = t1 + t2; - } - w[9] = w[10] = w[11] = w[12] = w[13] = w[14] = 0; - w[8] = 0x80000000; - w[15] = 0x00000100; - for (i = 0; i <= salt->iterations; i++) { - w[0] = a + H[0]; - w[1] = b + H[1]; - w[2] = c + H[2]; - w[3] = d + H[3]; - w[4] = e + H[4]; - w[5] = f + H[5]; - w[6] = g + H[6]; - w[7] = h + H[7]; - a = H[0]; - b = H[1]; - c = H[2]; - d = H[3]; - e = H[4]; - f = H[5]; - g = H[6]; - h = H[7]; + +__global__ void kernel_pwsafe(pwsafe_pass * in, pwsafe_hash * out) +{ + uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x; + uint32_t pl = in[idx].length, j, i; + uint32_t w[64]; + for (i = 0; i <= 14; i++) + w[i] = 0; + for (j = 0; j < pl; j++) { + uint32_t tmp = (((uint32_t) in[idx].v[j]) << ((3 - (j & 0x3)) << 3)); + w[j / 4] |= tmp; + } + for (; j < 32 + pl; j++) { + uint32_t tmp = (((uint32_t) cuda_salt[0].salt[j - pl]) << ((3 - + (j & 0x3)) << 3)); + w[j / 4] |= tmp; + } + w[j / 4] |= (((uint32_t) 0x80) << ((3 - (j & 0x3)) << 3)); + w[15] = 0x00000000 | (j * 8); + + for (j = 16; j < 64; j++) { + w[j] = + sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + w[j - + 16]; + } + memcpy(out[idx].hash, w, 32); + + uint32_t a = H[0]; + uint32_t b = H[1]; + uint32_t c = H[2]; + uint32_t d = H[3]; + uint32_t e = H[4]; + uint32_t f = H[5]; + uint32_t g = H[6]; + uint32_t h = H[7]; + for (uint32_t j = 0; j < 64; j++) { + uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + w[9] = w[10] = w[11] = w[12] = w[13] = w[14] = 0; + w[8] = 0x80000000; + w[15] = 0x00000100; + + for (i = 0; i <= cuda_salt[0].iterations; i++) { + w[0] = a + H[0]; + w[1] = b + H[1]; + w[2] = c + H[2]; + w[3] = d + H[3]; + w[4] = e + H[4]; + w[5] = f + H[5]; + w[6] = g + H[6]; + w[7] = h + H[7]; + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; #pragma unroll 48 - for (j = 16; j < 64; j++) - w[j] = - sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + - w[j - 16]; + for (j = 16; j < 64; j++) + w[j] = + sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + + w[j - 16]; #pragma unroll 64 - for (uint32_t j = 0; j < 64; j++) { - uint32_t t1 = - h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; - uint32_t t2 = Sigma0(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + t1; - d = c; - c = b; - b = a; - a = t1 + t2; - } - } - uint32_t cmp = 1; - uint32_t *v = (uint32_t *) salt->hash; - cmp &= (*v++ == a + H[0]); - cmp &= (*v++ == b + H[1]); - cmp &= (*v++ == c + H[2]); - cmp &= (*v++ == d + H[3]); - cmp &= (*v++ == e + H[4]); - cmp &= (*v++ == f + H[5]); - cmp &= (*v++ == g + H[6]); - cmp &= (*v++ == h + H[7]); - - out[idx].cracked = cmp; + for (uint32_t j = 0; j < 64; j++) { + uint32_t t1 = + h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + } + out[idx].hash[0] = SWAP32(a + H[0]); + out[idx].hash[1] = SWAP32(b + H[1]); + out[idx].hash[2] = SWAP32(c + H[2]); + out[idx].hash[3] = SWAP32(d + H[3]); + out[idx].hash[4] = SWAP32(e + H[4]); + out[idx].hash[5] = SWAP32(f + H[5]); + out[idx].hash[6] = SWAP32(g + H[6]); + out[idx].hash[7] = SWAP32(h + H[7]); } -extern "C" void gpu_pwpass(pwsafe_pass * host_in, pwsafe_salt * host_salt, +extern "C" void gpu_pwsafe(pwsafe_pass * host_in, pwsafe_salt * host_salt, pwsafe_hash * host_out) { - pwsafe_pass *cuda_pass = NULL; ///passwords - pwsafe_salt *cuda_salt = NULL; ///salt - pwsafe_hash *cuda_hash = NULL; ///hashes - - ///Aloc memory and copy data to gpu - cudaMalloc(&cuda_pass, PWSAFE_IN_SIZE); - cudaMalloc(&cuda_salt, PWSAFE_SALT_SIZE); - cudaMalloc(&cuda_hash, PWSAFE_OUT_SIZE); - cudaMemcpy(cuda_pass, host_in, PWSAFE_IN_SIZE, cudaMemcpyHostToDevice); - cudaMemcpy(cuda_salt, host_salt, PWSAFE_SALT_SIZE, - cudaMemcpyHostToDevice); - - ///Run kernel and wait for execution end - kernel_pwsafe <<< BLOCKS, THREADS >>> (cuda_pass, cuda_salt, - cuda_hash); - cudaThreadSynchronize(); - - ///Free memory and copy results back - cudaMemcpy(host_out, cuda_hash, PWSAFE_OUT_SIZE, - cudaMemcpyDeviceToHost); - cudaFree(cuda_pass); - cudaFree(cuda_salt); - cudaFree(cuda_hash); -} + pwsafe_pass *cuda_pass; + pwsafe_hash *cuda_hash; + HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt, + PWSAFE_SALT_SIZE)); + + ///Aloc memory and copy data to gpu + HANDLE_ERROR(cudaMalloc(&cuda_pass, PWSAFE_IN_SIZE)); + HANDLE_ERROR(cudaMalloc(&cuda_hash, PWSAFE_OUT_SIZE)); + ///Somehow this memset, which is not required, speeds things up a bit + HANDLE_ERROR(cudaMemset(cuda_hash, -1, PWSAFE_OUT_SIZE)); + HANDLE_ERROR(cudaMemcpy(cuda_pass, host_in, PWSAFE_IN_SIZE, + cudaMemcpyHostToDevice)); + + ///Run kernel and wait for execution end + kernel_pwsafe <<< BLOCKS, THREADS >>> (cuda_pass, cuda_hash); + + ///Free memory and copy results back + HANDLE_ERROR(cudaMemcpy(host_out, cuda_hash, PWSAFE_OUT_SIZE, + cudaMemcpyDeviceToHost)); + HANDLE_ERROR(cudaFree(cuda_pass)); + HANDLE_ERROR(cudaFree(cuda_hash)); +} diff -urpN magnum-jumbo/src/cuda_pwsafe.h magnum-jumbo_pwsafe2//src/cuda_pwsafe.h --- magnum-jumbo/src/cuda_pwsafe.h 2012-08-10 21:39:56.184782712 +0000 +++ magnum-jumbo_pwsafe2//src/cuda_pwsafe.h 2012-08-11 19:11:47.954408209 +0000 @@ -1,5 +1,5 @@ /* -* This software is Copyright (c) 2012 Lukas Odzioba +* This software is Copyright (c) 2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. */ @@ -9,8 +9,8 @@ #define uint8_t unsigned char #define uint32_t unsigned int -#define THREADS 512 -#define BLOCKS 112//14 // 112 is good for gtx460 +#define THREADS 256 +#define BLOCKS 112// is good for gtx460 #define KEYS_PER_CRYPT THREADS*BLOCKS #define MIN(a,b) (((a)<(b))?(a):(b)) @@ -27,20 +27,18 @@ (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24)) typedef struct { - uint8_t v[32]; + uint8_t v[15]; uint8_t length; } pwsafe_pass; typedef struct { - uint32_t cracked; ///cracked or not + uint32_t hash[8]; } pwsafe_hash; typedef struct { int version; uint32_t iterations; - uint8_t hash[32]; - uint8_t length; - uint8_t salt[32]; + uint8_t salt[32]; } pwsafe_salt; #endif diff -urpN magnum-jumbo/src/cuda_pwsafe_fmt.c magnum-jumbo_pwsafe2//src/cuda_pwsafe_fmt.c --- magnum-jumbo/src/cuda_pwsafe_fmt.c 2012-08-10 21:39:56.184782712 +0000 +++ magnum-jumbo_pwsafe2//src/cuda_pwsafe_fmt.c 2012-08-11 18:57:36.207408668 +0000 @@ -8,6 +8,9 @@ * Redistribution and use in source and binary forms, with or without modification, * are permitted. */ +#include "sha2.h" + + #include #include #include @@ -20,154 +23,303 @@ #include "base64.h" #include "memory.h" #include "cuda_pwsafe.h" +#include "cuda_common.h" #define FORMAT_LABEL "pwsafe-cuda" #define FORMAT_NAME "Password Safe SHA-256" #define ALGORITHM_NAME "CUDA" #define BENCHMARK_COMMENT "" #define BENCHMARK_LENGTH -1 -#define PLAINTEXT_LENGTH 32 +#define PLAINTEXT_LENGTH 15 #define BINARY_SIZE 32 #define SALT_SIZE sizeof(pwsafe_salt) #define MIN_KEYS_PER_CRYPT KEYS_PER_CRYPT #define MAX_KEYS_PER_CRYPT KEYS_PER_CRYPT static struct fmt_tests pwsafe_tests[] = { - {"$pwsafe$*3*fefc1172093344c9d5577b25f5b4b6e5d2942c94f9fc24c21733e28ae6527521*2048*88cbaf7d8668c1a98263f5dce7cb39c3304c49a3e0d76a7ea475dc02ab2f97a7", "12345678"}, - {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*4ab3c2d3af251e94eb2f753fdf30fb9da074bec6bac0fa9d9d152b95fc5795c6", "openwall"}, - {NULL} + {"$pwsafe$*3*fefc1172093344c9d5577b25f5b4b6e5d2942c94f9fc24c21733e28ae6527521*2048*88cbaf7d8668c1a98263f5dce7cb39c3304c49a3e0d76a7ea475dc02ab2f97a7", "12345678"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*4ab3c2d3af251e94eb2f753fdf30fb9da074bec6bac0fa9d9d152b95fc5795c6", "openwall"}, + {"$pwsafe$*3*eb5aa91895aad99437b855b2637870c60104acd21960b3b1f07527f7c050d042*2048*86439cc390c67fe7b0d35aaa0a906d3dbbc30d153792f9c7cc9101cc82064c78", "password"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*73810e24356ead0c24d2286c4fe2b9b3551343f9a903809824a9a9deb137900e", "a"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*c1597c5e59b3eb7e7cb7ce1f74e1afcea1b44adee9508ef933d901598dc7899f", "ab"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*293ce88074a83b358f198d56b467332a3f533d9312edd0fde168e125b4c64dcc", "abc"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*9f8871796c5ff8f86d847c9fed6a209f54c2e230260eda1dadee339efd9fc3ec", "abcd"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*915293e67ae3a107a2ab40c8f14bf6b34c826ddcebf471327e27d958b86c3a6b", "abcde"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*218d27214fbcbe981749c509a290d1a714d48ab226ef4da9f081a69b650e1663", "abcdef"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*a73867cdc29fc4d265c5331547450ab9155d1c1af52197fcaf573ac425ddf86d", "abcdefg"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*3339b41982d100556570bf53b9a632c9f26bba2d05cd7c94212ad6ffa9f5084f", "abcdefgh"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*30ce5d023d1d6af748c57492afff03b75e4aa41001a9c1dfb81b0b4bb110581a", "abcdefghi"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*466e9529612bbe5cf995ffe74ae34d707a42bae6c9d51a9d55d77428705bb09d", "abcdefghij"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*537f6d5471c6f8364fad3f3c81c72d935bf89b8658c613c11f6621789902cd26", "abcdefghijk"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*e46cf24227a97886b9640d289ecd5d8234d9793e925918a0df4a8c30c444f88e", "abcdefghijklm"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*396523c6737d01986a02e9ddcb4695c3819a3d5f9e51bee6d54ea4fb886e4ed2", "abcdefghijklmn"}, + {"$pwsafe$*3*581cd1135b9b993ccb0f6b01c1fcfacd799c69960496c96286f94fe1400c1b25*2048*8220e6ea9afa22cf52acbeefd61b809fc4413decaac84d853388924420098692", "abcdefghijklmno"}, + {NULL} }; +static pwsafe_pass *host_pass; /** binary ciphertexts **/ +static pwsafe_salt *host_salt; /** salt **/ +static pwsafe_hash *host_hash; /** calculated hashes **/ +extern void gpu_pwsafe(pwsafe_pass *, pwsafe_salt *, pwsafe_hash *); +static void cleanup() +{ + free(host_pass); + free(host_hash); + free(host_salt); +} -static int any_cracked; -static pwsafe_pass *host_pass; /** binary ciphertexts **/ -static pwsafe_salt *host_salt; /** salt **/ -static pwsafe_hash *host_hash; /** calculated hashes **/ -extern void gpu_pwpass(pwsafe_pass *, pwsafe_salt *, pwsafe_hash *); -static void init(struct fmt_main *self) -{ - host_pass = calloc(KEYS_PER_CRYPT, sizeof(pwsafe_pass)); - host_hash = calloc(KEYS_PER_CRYPT, sizeof(pwsafe_hash)); - host_salt = calloc(1, sizeof(pwsafe_salt)); - any_cracked = 1; +static void init(struct fmt_main *pFmt) +{ + host_pass = calloc(KEYS_PER_CRYPT, sizeof(pwsafe_pass)); + host_hash = calloc(KEYS_PER_CRYPT, sizeof(pwsafe_hash)); + host_salt = calloc(1, sizeof(pwsafe_salt)); + check_mem_allocation(host_pass, host_hash); + check_mem_allocation(host_salt, host_salt); + cuda_init(gpu_id); + memset(host_pass, 0, KEYS_PER_CRYPT * sizeof(pwsafe_pass)); + memset(host_hash, 0, KEYS_PER_CRYPT * sizeof(pwsafe_hash)); + memset(host_salt, 0, sizeof(pwsafe_salt)); + atexit(cleanup); } static int valid(char *ciphertext, struct fmt_main *self) { - return !strncmp(ciphertext, "$pwsafe$", 8); + return !strncmp(ciphertext, "$pwsafe$", 8); } static void *get_salt(char *ciphertext) { - char *ctcopy = strdup(ciphertext); - char *keeptr = ctcopy; - char *p; - int i; - pwsafe_salt *salt_struct = - mem_alloc_tiny(sizeof(pwsafe_salt), MEM_ALIGN_WORD); - ctcopy += 9; /* skip over "$pwsafe$*" */ - p = strtok(ctcopy, "*"); - salt_struct->version = atoi(p); - p = strtok(NULL, "*"); - for (i = 0; i < 32; i++) - salt_struct->salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16 - + atoi16[ARCH_INDEX(p[i * 2 + 1])]; - p = strtok(NULL, "*"); - salt_struct->iterations = (unsigned int) atoi(p); - p = strtok(NULL, "*"); - for (i = 0; i < 32; i++) - salt_struct->hash[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16 - + atoi16[ARCH_INDEX(p[i * 2 + 1])]; + char *ctcopy = strdup(ciphertext); + char *keeptr = ctcopy; + char *p; + int i; + static pwsafe_salt salt; + ctcopy += 9; /* skip over "$pwsafe$*" */ + p = strtok(ctcopy, "*"); + salt.version = atoi(p); + p = strtok(NULL, "*"); + for (i = 0; i < 32; i++) + salt.salt[i] = atoi16[ARCH_INDEX(p[i * 2])] * 16 + + atoi16[ARCH_INDEX(p[i * 2 + 1])]; + p = strtok(NULL, "*"); + salt.iterations = (unsigned int) atoi(p); + p = strtok(NULL, "*"); - free(keeptr); + free(keeptr); + return (void *) &salt; +} - alter_endianity(salt_struct->hash, 32); - return (void *) salt_struct; +static void set_salt(void *salt) +{ + memcpy(host_salt, salt, SALT_SIZE); +#ifdef _DEBUG + int i; + uint32_t *s = host_salt->salt; + for (i = 0; i < 8; i++) + printf("%08x ", s[i]); + puts(""); +#endif } -static void set_salt(void *salt) +static void *binary(char *ciphertext) { - memcpy(host_salt, salt, SALT_SIZE); - any_cracked = 0; + static union { + unsigned char c[BINARY_SIZE]; + ARCH_WORD dummy; + } buf; + unsigned char *out = buf.c; + char *p; + int i; + p = strrchr(ciphertext, '*') + 1; + for (i = 0; i < BINARY_SIZE; i++) { + out[i] = + (atoi16[ARCH_INDEX(*p)] << 4) | atoi16[ARCH_INDEX(p[1])]; + p += 2; + } + return out; } static void crypt_all(int count) { - int i; - any_cracked = 0; - - gpu_pwpass(host_pass, host_salt, host_hash); - for (i = 0; i < count; i++) { - if (host_hash[i].cracked == 1) - any_cracked = 1; - } +#ifdef _DEBUG + printf("crypt_all(%d)\n", count); +#endif + gpu_pwsafe(host_pass, host_salt, host_hash); } static int cmp_all(void *binary, int count) { - return any_cracked; + uint32_t i; + uint32_t b = ((uint32_t *) binary)[0]; + for (i = 0; i < count; i++) + if (b == host_hash[i].hash[0]) + return 1; + return 0; } static int cmp_one(void *binary, int index) { - return host_hash[index].cracked; + int i; + uint32_t *t = (uint32_t *) binary; + for (i = 0; i < 8; i++) + if (t[i] != host_hash[index].hash[i]) + return 0; + return 1; } -static int cmp_exact(char *source, int index) +static int cmp_exact(char *source, int count) { - return host_hash[index].cracked; + return 1; } -static void pwsafe_set_key(char *key, int index) +static void set_key(char *key, int index) { - int saved_key_length = MIN(strlen(key), PLAINTEXT_LENGTH); - memcpy(host_pass[index].v, key, saved_key_length); - host_pass[index].length = saved_key_length; +#ifdef _DEBUG + printf("set_key(%d)=[%s]\n", index, key); +#endif + int saved_key_length = MIN(strlen(key), PLAINTEXT_LENGTH); + memcpy(host_pass[index].v, key, saved_key_length); + host_pass[index].length = saved_key_length; } static char *get_key(int index) { - static char ret[PLAINTEXT_LENGTH + 1]; - memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH); - ret[MIN(host_pass[index].length, PLAINTEXT_LENGTH)] = 0; - return ret; + static char ret[PLAINTEXT_LENGTH + 1]; + memcpy(ret, host_pass[index].v, PLAINTEXT_LENGTH); + ret[MIN(host_pass[index].length, PLAINTEXT_LENGTH)] = 0; + return ret; +} + + +static int binary_hash_0(void *binary) +{ +#ifdef _DEBUG + int i; + uint32_t *bin = binary; + puts("binary_hash_0"); + for (i = 0; i < 8; i++) + printf("%08x ", bin[i]); + puts(""); +#endif + return (((ARCH_WORD_32 *) binary)[0] & 0xf); +} + +static int binary_hash_1(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0xff; +} + +static int binary_hash_2(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0xfff; +} + +static int binary_hash_3(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0xffff; +} + +static int binary_hash_4(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0xfffff; +} + +static int binary_hash_5(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0xffffff; +} + +static int binary_hash_6(void *binary) +{ + return ((ARCH_WORD_32 *) binary)[0] & 0x7ffffff; +} + +static int get_hash_0(int index) +{ +#ifdef _DEBUG + int i; + puts("get_hash_0"); + for (i = 0; i < 8; i++) + printf("%08x ", host_hash[index].hash[i]); + puts(""); +#endif + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xf; +} + +static int get_hash_1(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xff; +} + +static int get_hash_2(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xfff; +} + +static int get_hash_3(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xffff; +} + +static int get_hash_4(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xfffff; +} + +static int get_hash_5(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0xffffff; +} + +static int get_hash_6(int index) +{ + return ((ARCH_WORD_32 *) host_hash[index].hash)[0] & 0x7ffffff; } struct fmt_main fmt_cuda_pwsafe = { { - FORMAT_LABEL, - FORMAT_NAME, - ALGORITHM_NAME, - BENCHMARK_COMMENT, - BENCHMARK_LENGTH, - PLAINTEXT_LENGTH, - BINARY_SIZE, - SALT_SIZE, - KEYS_PER_CRYPT, - KEYS_PER_CRYPT, - FMT_CASE | FMT_8_BIT, - pwsafe_tests - }, { - init, - fmt_default_prepare, - valid, - fmt_default_split, - fmt_default_binary, - get_salt, - { - fmt_default_binary_hash - }, - fmt_default_salt_hash, - set_salt, - pwsafe_set_key, - get_key, - fmt_default_clear_keys, - crypt_all, - { - fmt_default_get_hash - }, - cmp_all, - cmp_one, - cmp_exact - } + FORMAT_LABEL, + FORMAT_NAME, + ALGORITHM_NAME, + BENCHMARK_COMMENT, + BENCHMARK_LENGTH, + PLAINTEXT_LENGTH, + BINARY_SIZE, + SALT_SIZE, + KEYS_PER_CRYPT, + KEYS_PER_CRYPT, + FMT_CASE | FMT_8_BIT, + pwsafe_tests}, + { + init, + fmt_default_prepare, + valid, + fmt_default_split, + binary, + get_salt, + { + binary_hash_0, + binary_hash_1, + binary_hash_2, + binary_hash_3, + binary_hash_4, + binary_hash_5, + binary_hash_6}, + fmt_default_salt_hash, + set_salt, + set_key, + get_key, + fmt_default_clear_keys, + crypt_all, + { + get_hash_0, + get_hash_1, + get_hash_2, + get_hash_3, + get_hash_4, + get_hash_5, + get_hash_6}, + cmp_all, + cmp_one, + cmp_exact} }; diff -urpN magnum-jumbo/src/opencl/pwsafe_kernel.cl magnum-jumbo_pwsafe2//src/opencl/pwsafe_kernel.cl --- magnum-jumbo/src/opencl/pwsafe_kernel.cl 2012-08-10 21:39:56.197783109 +0000 +++ magnum-jumbo_pwsafe2//src/opencl/pwsafe_kernel.cl 2012-08-11 19:06:42.713408659 +0000 @@ -29,152 +29,151 @@ typedef struct { - uint8_t v[15]; - uint8_t length; + uint8_t v[15]; + uint8_t length; } pwsafe_pass; typedef struct { - uint32_t cracked; ///cracked or not + uint32_t cracked; ///cracked or not } pwsafe_hash; typedef struct { - int version; - uint32_t iterations; - uint8_t hash[32]; - // uint8_t length; - uint8_t salt[32]; + int version; + uint32_t iterations; + uint8_t hash[32]; + uint8_t salt[32]; } pwsafe_salt; __constant uint32_t k[] = { - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, - 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, - 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, - 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, - 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, - 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, - 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, - 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, - 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 - }; - - __constant uint32_t H[] = { - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, - 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 - }; + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, + 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, + 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, + 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, + 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, + 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, + 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, + 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, + 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +__constant uint32_t H[] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, + 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; __kernel void pwsafe(__global pwsafe_pass * in, __global pwsafe_hash * out, __global pwsafe_salt * salt) { - uint32_t idx = get_global_id(0); - uint32_t pl = in[idx].length, j, i; + uint32_t idx = get_global_id(0); + uint32_t pl = in[idx].length, j, i; - uint32_t w[64]; - for (i = 0; i < 14; i++) - w[i] = 0; - for (j = 0; j < pl; j++) { - uint32_t tmp = 0; - tmp |= (((uint32_t) in[idx].v[j]) << ((3 - (j & 0x3)) << 3)); - w[j / 4] |= tmp; - } - for (; j < 32 + pl; j++) { - uint32_t tmp = 0; - tmp |= - (((uint32_t) salt->salt[j - pl]) << ((3 - - (j & 0x3)) << 3)); - w[j / 4] |= tmp; - } - w[j / 4] |= (((uint32_t) 0x80) << ((3 - (j & 0x3)) << 3)); - w[15] = 0x00000000 | (j * 8); - - for (j = 16; j < 64; j++) { - w[j] = - sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + w[j - - 16]; - } - - uint32_t a = H[0]; - uint32_t b = H[1]; - uint32_t c = H[2]; - uint32_t d = H[3]; - uint32_t e = H[4]; - uint32_t f = H[5]; - uint32_t g = H[6]; - uint32_t h = H[7]; + uint32_t w[64]; + for (i = 0; i <= 14; i++) + w[i] = 0; + for (j = 0; j < pl; j++) { + uint32_t tmp = 0; + tmp |= (((uint32_t) in[idx].v[j]) << ((3 - (j & 0x3)) << 3)); + w[j / 4] |= tmp; + } + for (; j < 32 + pl; j++) { + uint32_t tmp = 0; + tmp |= + (((uint32_t) salt->salt[j - pl]) << ((3 - + (j & 0x3)) << 3)); + w[j / 4] |= tmp; + } + w[j / 4] |= (((uint32_t) 0x80) << ((3 - (j & 0x3)) << 3)); + w[15] = 0x00000000 | (j * 8); + + for (j = 16; j < 64; j++) { + w[j] = + sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + w[j - + 16]; + } + + uint32_t a = H[0]; + uint32_t b = H[1]; + uint32_t c = H[2]; + uint32_t d = H[3]; + uint32_t e = H[4]; + uint32_t f = H[5]; + uint32_t g = H[6]; + uint32_t h = H[7]; #pragma unroll 64 - for (uint32_t j = 0; j < 64; j++) { - uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; - uint32_t t2 = Sigma0(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + t1; - d = c; - c = b; - b = a; - a = t1 + t2; - } - - for (i = 0; i <= salt->iterations; i++) { - w[0] = a + H[0]; - w[1] = b + H[1]; - w[2] = c + H[2]; - w[3] = d + H[3]; - w[4] = e + H[4]; - w[5] = f + H[5]; - w[6] = g + H[6]; - w[7] = h + H[7]; - w[9] = w[10] = w[11] = w[12] = w[13] = w[14] = 0; - w[8] = 0x80000000; - w[15] = 0x00000100; - for (j = 16; j < 64; j++) - w[j] = - sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + - w[j - 16]; - - a = H[0]; - b = H[1]; - c = H[2]; - d = H[3]; - e = H[4]; - f = H[5]; - g = H[6]; - h = H[7]; + for (uint32_t j = 0; j < 64; j++) { + uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + for (i = 0; i <= salt->iterations; i++) { + w[0] = a + H[0]; + w[1] = b + H[1]; + w[2] = c + H[2]; + w[3] = d + H[3]; + w[4] = e + H[4]; + w[5] = f + H[5]; + w[6] = g + H[6]; + w[7] = h + H[7]; + w[9] = w[10] = w[11] = w[12] = w[13] = w[14] = 0; + w[8] = 0x80000000; + w[15] = 0x00000100; + for (j = 16; j < 64; j++) + w[j] = + sigma1(w[j - 2]) + w[j - 7] + sigma0(w[j - 15]) + + w[j - 16]; + + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; #pragma unroll 64 - for (uint32_t j = 0; j < 64; j++) { - uint32_t t1 = - h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; - uint32_t t2 = Sigma0(a) + Maj(a, b, c); - h = g; - g = f; - f = e; - e = d + t1; - d = c; - c = b; - b = a; - a = t1 + t2; - } - } - uint32_t cmp = 1; - - __global uint32_t *v = salt->hash; - cmp &= (*v++ == a + H[0]); - cmp &= (*v++ == b + H[1]); - cmp &= (*v++ == c + H[2]); - cmp &= (*v++ == d + H[3]); - cmp &= (*v++ == e + H[4]); - cmp &= (*v++ == f + H[5]); - cmp &= (*v++ == g + H[6]); - cmp &= (*v++ == h + H[7]); - - out[idx].cracked = cmp; + for (uint32_t j = 0; j < 64; j++) { + uint32_t t1 = + h + Sigma1(e) + Ch(e, f, g) + k[j] + w[j]; + uint32_t t2 = Sigma0(a) + Maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + } + uint32_t cmp = 0; + __global uint32_t *v = salt->hash; + if (*v++ == a + H[0]) { + uint32_t diff; + diff = *v++ ^ (b + H[1]); + diff |= *v++ ^ (c + H[2]); + diff |= *v++ ^ (d + H[3]); + diff |= *v++ ^ (e + H[4]); + diff |= *v++ ^ (f + H[5]); + diff |= *v++ ^ (g + H[6]); + diff |= *v++ ^ (h + H[7]); + cmp = !diff; + } + out[idx].cracked = cmp; } - diff -urpN magnum-jumbo/src/opencl_pwsafe_fmt.c magnum-jumbo_pwsafe2//src/opencl_pwsafe_fmt.c --- magnum-jumbo/src/opencl_pwsafe_fmt.c 2012-08-10 21:39:56.199783111 +0000 +++ magnum-jumbo_pwsafe2//src/opencl_pwsafe_fmt.c 2012-08-11 19:07:28.569408512 +0000 @@ -33,7 +33,7 @@ #define PLAINTEXT_LENGTH 15 #define BINARY_SIZE 32 #define KERNEL_NAME "pwsafe" -#define KEYS_PER_CRYPT 1024 +#define KEYS_PER_CRYPT 112*256 #define MIN_KEYS_PER_CRYPT KEYS_PER_CRYPT #define MAX_KEYS_PER_CRYPT KEYS_PER_CRYPT # define SWAP32(n) \