diff -urpN magnumripper-magnum-jumbo-62b2557/doc/README-CUDA magnumripper-magnum-jumbo-62b2557-fast//doc/README-CUDA --- magnumripper-magnum-jumbo-62b2557/doc/README-CUDA 2012-03-28 15:30:17.000000000 +0000 +++ magnumripper-magnum-jumbo-62b2557-fast//doc/README-CUDA 2012-03-31 20:25:03.429817122 +0000 @@ -10,5 +10,8 @@ Performance issues: If you have got Fermi or newer card change "-arch sm_10" to "-arch sm_20" in the NVCC_FLAGS (Makefile). Default THREADS and BLOCKS settings might not be optimal. To get better performance you can experiment with THREADS and BLOCKS macros defined for each format in cuda*.h file. + For MSCash2: + CARD NAME BLOCKS THREADS SM RESULT + GTX460 14 128 20 14194 c/s You can contact me at lukas[dot]odzioba[at]gmail[dot]com or john-dev mailing list diff -urpN magnumripper-magnum-jumbo-62b2557/src/cuda/mscash2.cu magnumripper-magnum-jumbo-62b2557-fast//src/cuda/mscash2.cu --- magnumripper-magnum-jumbo-62b2557/src/cuda/mscash2.cu 2012-03-28 15:30:17.000000000 +0000 +++ magnumripper-magnum-jumbo-62b2557-fast//src/cuda/mscash2.cu 2012-03-31 20:23:30.317067463 +0000 @@ -1,5 +1,5 @@ /* -* This software is Copyright (c) 2011 Lukas Odzioba +* This software is Copyright (c) 2011,2012 Lukas Odzioba * and it is hereby released to the general public under the following terms: * Redistribution and use in source and binary forms, with or without modification, are permitted. * Based on S3nf implementation http://openwall.info/wiki/john/MSCash2 @@ -168,7 +168,7 @@ __device__ __host__ void preproc(const u } __device__ void hmac_sha1(const uint8_t * key, uint32_t keylen, - const uint8_t * input, uint32_t inputlen, uint8_t * output, + const uint8_t * input, uint32_t inputlen, uint32_t * output, uint32_t * ipad_state, uint32_t * opad_state) { int i; @@ -240,28 +240,107 @@ __device__ void hmac_sha1(const uint8_t D += state_D; E += state_E; - PUT_WORD_32_BE(A, output, 0); - PUT_WORD_32_BE(B, output, 4); - PUT_WORD_32_BE(C, output, 8); - PUT_WORD_32_BE(D, output, 12); - PUT_WORD_32_BE(E, output, 16); + output[0]=SWAP(A); + output[1]=SWAP(B); + output[2]=SWAP(C); + output[3]=SWAP(D); + output[4]=SWAP(E); } + +__device__ void big_hmac_sha1( + uint32_t * input, uint32_t inputlen, + uint32_t * ipad_state, uint32_t * opad_state,uint32_t *tmp_out) +{ + int i,lo; + uint32_t temp, W[16]; + uint32_t A, B, C, D, E; +#pragma unroll 5 + for(i=0;i<5;i++) + W[i]=SWAP(input[i]); +#pragma unroll 4 + for(i=0;i<4;i++) + tmp_out[i]=SWAP(tmp_out[i]); + + for(lo=1; lounicode_salt, salt, 64); - - for (idx = 0; idx < KEYS_PER_CRYPT; idx++) { uint8_t *password = inbuffer[idx].v; @@ -353,9 +423,8 @@ __host__ void mscash_cpu(mscash2_passwor md4_crypt(nt_hash, inbuffer[idx].dcc_hash); } - - } + __host__ void mscash2_gpu(mscash2_password * inbuffer, mscash2_hash * outbuffer, mscash2_salt * host_salt) { diff -urpN magnumripper-magnum-jumbo-62b2557/src/cuda_mscash2.h magnumripper-magnum-jumbo-62b2557-fast//src/cuda_mscash2.h --- magnumripper-magnum-jumbo-62b2557/src/cuda_mscash2.h 2012-03-28 15:30:17.000000000 +0000 +++ magnumripper-magnum-jumbo-62b2557-fast//src/cuda_mscash2.h 2012-03-31 20:19:29.744816890 +0000 @@ -11,7 +11,7 @@ #define uint16_t unsigned short #define uint32_t unsigned int -#define THREADS 128//set 256 on fermi +#define THREADS 128//set at least 256 on fermi #define BLOCKS 14 #define KEYS_PER_CRYPT (THREADS)*(BLOCKS)