From 15c7b5a182cf5191ce28329e6d1a560b8dc0cf7d Mon Sep 17 00:00:00 2001
From: magnum <john.magnum@hushmail.com>
Date: Mon, 30 Mar 2015 11:43:08 +0200
Subject: [PATCH 1/2] Pomelo OpenCL: Auto-tune fixes.

---
 src/opencl_pomelo_fmt_plug.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/opencl_pomelo_fmt_plug.c b/src/opencl_pomelo_fmt_plug.c
index 6cf72d1..1966bf7 100644
--- a/src/opencl_pomelo_fmt_plug.c
+++ b/src/opencl_pomelo_fmt_plug.c
@@ -53,7 +53,8 @@ john_register_one(&fmt_opencl_pomelo);
 #include "memdbg.h"
 
 static const char *warn[] = {
-	"xfer: ", "xfer: ", ", crypt: ", ", xfer: "
+	"xfer salt1: ", ", xfer salt2: ", ", xfer keys: ", ", xfer idx: ",
+	", crypt: ", ", xfer: "
 };
 
 #define MIN(a, b)		(((a) > (b)) ? (b) : (a))
@@ -308,17 +309,10 @@ static int valid(char *ciphertext, struct fmt_main *self)
 static void init(struct fmt_main *self)
 {
 	char build_opts[64];
-	size_t gws_limit;
 
 
 	opencl_init("$JOHN/kernels/pomelo_kernel.cl", gpu_id, build_opts);
 
-	// Current key_idx can only hold 26 bits of offset so
-	// we can't reliably use a GWS higher than 4M or so.
-	gws_limit = MIN((1 << 26) * 4 / MEM_SIZE,
-	    get_max_mem_alloc_size(gpu_id) / MEM_SIZE);
-
-
 	// create kernel to execute
 	crypt_kernel =
 	    clCreateKernel(program[gpu_id], "pomelo_crypt_kernel", &ret_code);
@@ -327,10 +321,10 @@ static void init(struct fmt_main *self)
 
 	//Initialize openCL tuning (library) for this format.
 	opencl_init_auto_setup(SEED, 0, NULL,
-	    warn, 2, self, create_clobj, release_clobj, MEM_SIZE, gws_limit);
+	    warn, 4, self, create_clobj, release_clobj, 2 * MEM_SIZE, 0);
 
 	//Auto tune execution from shared/included code.
-	autotune_run(self, 1, 100, 200);
+	autotune_run(self, 1, 100, 1000);
 
 }
 
@@ -510,11 +504,11 @@ static int crypt_all(int *pcount, struct db_salt *salt)
 
 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_rest_salt,
 		CL_FALSE, 0, 4 * sizeof(unsigned short int), saved_rest_salt,
-		0, NULL, multi_profilingEvent[0]), "Failed transferring keys");
+		0, NULL, multi_profilingEvent[0]), "Failed transferring rest salt");
 
 	HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id], cl_saved_real_salt,
 		CL_FALSE, 0, SALT_SIZE, saved_real_salt, 0, NULL,
-		multi_profilingEvent[1]), "Failed transferring keys");
+		multi_profilingEvent[1]), "Failed transferring real salt");
 
 	if (key_idx > key_offset)
 		HANDLE_CLERROR(clEnqueueWriteBuffer(queue[gpu_id],
-- 
2.3.2