From 7474c08f1a9d7ec78ad8297e990d1962b73418b3 Mon Sep 17 00:00:00 2001
From: Cameron Fairchild <cameron@opentensor.ai>
Date: Wed, 5 Jul 2023 20:50:31 -0400
Subject: [PATCH 1/4] Optimization thanks to @GithubRealFan

---
 kernels/main.cu | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/kernels/main.cu b/kernels/main.cu
index da754aa..c5844e8 100644
--- a/kernels/main.cu
+++ b/kernels/main.cu
@@ -1,5 +1,6 @@
 /*
  * The MIT License (MIT)
+ * Copyright (c) 2023 GithubRealFan 
  * Copyright (c) 2022 Cameron Fairchild
  * Copyright (c) 2022 Opentensor Foundation
 
@@ -145,10 +146,13 @@ __device__ void create_seal_hash(BYTE* seal, BYTE* block_hash, uint64 nonce) {
 __device__ bool found = false;
 
 // TODO: Use CUDA streams and events to dispatch new blocks and recieve solutions
-__global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interval, unsigned int n_nonces, uint256 limit, BYTE* block_bytes) {
-    for (uint64 i = blockIdx.x * blockDim.x + threadIdx.x; 
-        i < n_nonces; 
-        i += blockDim.x * gridDim.x) 
+__global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interval, unsigned int n_nonces, uint256 limit, BYTE* block_bytes)
+{
+    int blockSize = 8;
+    uint64 new_interval = update_interval / blockSize;
+    for (uint64 i = blockIdx.x * blockDim.x + threadIdx.x;
+         i < n_nonces * blockSize;
+         i += blockDim.x * gridDim.x)
     {
         if (found) {
             break;
@@ -160,8 +164,8 @@ __global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interv
             seal[j] = 0xff;
         }
 
-        uint64 nonce = nonce_start + i * update_interval;
-        for (uint64 j = nonce; j < nonce + update_interval; j++) {
+        uint64 nonce = nonce_start + i * new_interval;
+        for (uint64 j = nonce; j < nonce + new_interval; j++) {
             create_seal_hash(seal, block_bytes, j);
         
             if (seal_meets_difficulty(seal, limit)) {
@@ -212,10 +216,9 @@ __global__ void test_seal_meets_difficulty(BYTE* seal, uint256 limit, bool* resu
     *result = seal_meets_difficulty(seal, limit);
 }
 
-void runSolve(int blockSize, uint64* solution, uint64 nonce_start, uint64 update_interval, uint256 limit, BYTE* block_bytes) {
-	int numBlocks = (blockSize + blockSize - 1) / blockSize;
-
-	solve <<< numBlocks, blockSize >>> (solution, nonce_start, update_interval, blockSize, limit, block_bytes);
+void runSolve(int blockSize, uint64 *solution, uint64 nonce_start, uint64 update_interval, uint256 limit, BYTE *block_bytes)
+{
+    solve<<<8, blockSize>>>(solution, nonce_start, update_interval, blockSize, limit, block_bytes);
 }
 
 bool runTestSealMeetsDifficulty(BYTE* seal, uint256 limit) {

From a9f931f6af09a7ec7e760cf6c47957b27d394cd3 Mon Sep 17 00:00:00 2001
From: Cameron Fairchild <cameron@opentensor.ai>
Date: Wed, 5 Jul 2023 20:51:07 -0400
Subject: [PATCH 2/4] bump to 1.2.2

---
 pyproject.toml | 2 +-
 setup.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 59e75bb..e5af35d 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cubit"
-version = "1.2.0"
+version = "1.2.2"
 description = "A python package for CUDA registration on bittensor. "
 readme = "README.md"
 authors = [{ name = "Opentensor Foundation", email = "cameron@opentensor.ai" }]
diff --git a/setup.py b/setup.py
index 9b37420..5587f8b 100644
--- a/setup.py
+++ b/setup.py
@@ -159,7 +159,7 @@ def build_extensions(self):
     author = 'Opentensor Foundation',
     author_email = 'cameron@opentensor.ai',
     url = 'https://github.com/opentensor/cubit',
-    version = '1.2.1',
+    version = '1.2.2',
 
     ext_modules = cythonize(ext),
 

From dfed4cf9004b71d796907525c0d7004f772872c8 Mon Sep 17 00:00:00 2001
From: Cameron Fairchild <cameron@opentensor.ai>
Date: Wed, 5 Jul 2023 20:53:39 -0400
Subject: [PATCH 3/4] add ack to readme

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d062083..c78fc02 100644
--- a/README.md
+++ b/README.md
@@ -86,3 +86,4 @@ python3 -m unittest test.py
 https://github.com/rmcgibbo/npcuda-example/  
 https://github.com/mochimodev/cuda-hashing-algos/  
 https://github.com/camfairchild/bittensor_register_cuda/
+https://github.com/GithubRealFan/cubit-fast/

From 376d2e218ed5becce11cc0de17251403ab71bcdd Mon Sep 17 00:00:00 2001
From: Cameron Fairchild <cameron@opentensor.ai>
Date: Wed, 5 Jul 2023 21:15:21 -0400
Subject: [PATCH 4/4] fix version in readme

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index c78fc02..32b69b2 100644
--- a/README.md
+++ b/README.md
@@ -19,29 +19,29 @@ A python package to register on the bittensor network using a CUDA device.
 Using the wheel for your version of python (3.8, etc.) from [releases](https://github.com/opentensor/cubit/releases/latest)   
 For Python 3.8  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp38-cp38-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp38-cp38-linux_x86_64.whl
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp39-cp39-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp39-cp39-linux_x86_64.whl
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp310-cp310-linux_x86_64.whl
 ```   
 #### Install testing dependencies
 Install `test` extras from wheel
 For Python 3.8  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp38-cp38-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp38-cp38-linux_x86_64.whl[test]
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp39-cp39-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp39-cp39-linux_x86_64.whl[test]
 ```   
 For Python 3.10  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp310-cp310-linux_x86_64.whl[test]
 ```   
 ### From source
 #### Requirements