diff --git a/README.md b/README.md
index d062083..32b69b2 100644
--- a/README.md
+++ b/README.md
@@ -19,29 +19,29 @@ A python package to register on the bittensor network using a CUDA device.
 Using the wheel for your version of python (3.8, etc.) from [releases](https://github.com/opentensor/cubit/releases/latest)   
 For Python 3.8  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp38-cp38-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp38-cp38-linux_x86_64.whl
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp39-cp39-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp39-cp39-linux_x86_64.whl
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp310-cp310-linux_x86_64.whl
 ```   
 #### Install testing dependencies
 Install `test` extras from wheel
 For Python 3.8  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp38-cp38-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp38-cp38-linux_x86_64.whl[test]
 ```   
 For Python 3.9  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp39-cp39-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp39-cp39-linux_x86_64.whl[test]
 ```   
 For Python 3.10  
 ```
-pip install https://github.com/opentensor/cubit/releases/download/v1.1.2/cubit-1.1.2-cp310-cp310-linux_x86_64.whl[test]
+pip install https://github.com/opentensor/cubit/releases/download/v1.2.2/cubit-1.2.2-cp310-cp310-linux_x86_64.whl[test]
 ```   
 ### From source
 #### Requirements   
@@ -86,3 +86,4 @@ python3 -m unittest test.py
 https://github.com/rmcgibbo/npcuda-example/  
 https://github.com/mochimodev/cuda-hashing-algos/  
 https://github.com/camfairchild/bittensor_register_cuda/
+https://github.com/GithubRealFan/cubit-fast/
diff --git a/kernels/main.cu b/kernels/main.cu
index da754aa..c5844e8 100644
--- a/kernels/main.cu
+++ b/kernels/main.cu
@@ -1,5 +1,6 @@
 /*
  * The MIT License (MIT)
+ * Copyright (c) 2023 GithubRealFan 
  * Copyright (c) 2022 Cameron Fairchild
  * Copyright (c) 2022 Opentensor Foundation
 
@@ -145,10 +146,13 @@ __device__ void create_seal_hash(BYTE* seal, BYTE* block_hash, uint64 nonce) {
 __device__ bool found = false;
 
 // TODO: Use CUDA streams and events to dispatch new blocks and recieve solutions
-__global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interval, unsigned int n_nonces, uint256 limit, BYTE* block_bytes) {
-    for (uint64 i = blockIdx.x * blockDim.x + threadIdx.x; 
-        i < n_nonces; 
-        i += blockDim.x * gridDim.x) 
+__global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interval, unsigned int n_nonces, uint256 limit, BYTE* block_bytes)
+{
+    int blockSize = 8;
+    uint64 new_interval = update_interval / blockSize;
+    for (uint64 i = blockIdx.x * blockDim.x + threadIdx.x;
+         i < n_nonces * blockSize;
+         i += blockDim.x * gridDim.x)
     {
         if (found) {
             break;
@@ -160,8 +164,8 @@ __global__ void solve(uint64* solution, uint64 nonce_start, uint64 update_interv
             seal[j] = 0xff;
         }
 
-        uint64 nonce = nonce_start + i * update_interval;
-        for (uint64 j = nonce; j < nonce + update_interval; j++) {
+        uint64 nonce = nonce_start + i * new_interval;
+        for (uint64 j = nonce; j < nonce + new_interval; j++) {
             create_seal_hash(seal, block_bytes, j);
         
             if (seal_meets_difficulty(seal, limit)) {
@@ -212,10 +216,9 @@ __global__ void test_seal_meets_difficulty(BYTE* seal, uint256 limit, bool* resu
     *result = seal_meets_difficulty(seal, limit);
 }
 
-void runSolve(int blockSize, uint64* solution, uint64 nonce_start, uint64 update_interval, uint256 limit, BYTE* block_bytes) {
-	int numBlocks = (blockSize + blockSize - 1) / blockSize;
-
-	solve <<< numBlocks, blockSize >>> (solution, nonce_start, update_interval, blockSize, limit, block_bytes);
+void runSolve(int blockSize, uint64 *solution, uint64 nonce_start, uint64 update_interval, uint256 limit, BYTE *block_bytes)
+{
+    solve<<<8, blockSize>>>(solution, nonce_start, update_interval, blockSize, limit, block_bytes);
 }
 
 bool runTestSealMeetsDifficulty(BYTE* seal, uint256 limit) {
diff --git a/pyproject.toml b/pyproject.toml
index 59e75bb..e5af35d 100755
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "cubit"
-version = "1.2.0"
+version = "1.2.2"
 description = "A python package for CUDA registration on bittensor. "
 readme = "README.md"
 authors = [{ name = "Opentensor Foundation", email = "cameron@opentensor.ai" }]
diff --git a/setup.py b/setup.py
index 9b37420..5587f8b 100644
--- a/setup.py
+++ b/setup.py
@@ -159,7 +159,7 @@ def build_extensions(self):
     author = 'Opentensor Foundation',
     author_email = 'cameron@opentensor.ai',
     url = 'https://github.com/opentensor/cubit',
-    version = '1.2.1',
+    version = '1.2.2',
 
     ext_modules = cythonize(ext),