Merge pull request #91 from EthicalML/python_shader_extension

Python shader extension
KomputeProject · Nov 8, 2020 · de6ddd6 · de6ddd6
2 parents 0a416f6 + 3b540d0
commit de6ddd6
Show file tree

Hide file tree

Showing 8 changed files with 1,277 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -306,8 +306,18 @@ tensor_out = Tensor([0, 0, 0])
 
 mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
 
-shaderFilePath = "shaders/glsl/opmult.comp"
-mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
+# Define the function via PyShader or directly as glsl string or spirv bytes
+@python2shader
+def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
+                            data1=("buffer", 0, Array(f32)),
+                            data2=("buffer", 1, Array(f32)),
+                            data3=("buffer", 2, Array(f32))):
+    i = index.x
+    data3[i] = data1[i] * data2[i]
+
+# Run shader operation synchronously
+mgr.eval_algo_data_def(
+    [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
 
 # Alternatively can pass raw string/bytes:
 # shaderFileData = """ shader code here... """
@@ -332,13 +342,22 @@ tensor_in_a = Tensor([2, 2, 2])
 tensor_in_b = Tensor([1, 2, 3])
 tensor_out = Tensor([0, 0, 0])
 
-shaderFilePath = "../../shaders/glsl/opmult.comp"
-
 mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
 
 seq = mgr.create_sequence("op")
 
-mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath)
+# Define the function via PyShader or directly as glsl string or spirv bytes
+@python2shader
+def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
+                            data1=("buffer", 0, Array(f32)),
+                            data2=("buffer", 1, Array(f32)),
+                            data3=("buffer", 2, Array(f32))):
+    i = index.x
+    data3[i] = data1[i] * data2[i]
+
+# Run shader operation asynchronously and then await
+mgr.eval_async_algo_data_def(
+    [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
 mgr.eval_await_def()
 
 seq.begin()

diff --git a/docs/overview/python-package.rst b/docs/overview/python-package.rst
@@ -9,7 +9,7 @@ Below is a diagram that provides insights on the relationship between Vulkan Kom
 .. image:: ../images/kompute-architecture.jpg
    :width: 70%
 
-Python Components
+Core Python Components
 ^^^^^^^^
 
 The Python package exposes three main classes:
@@ -30,15 +30,98 @@ More specifically, it can be through the following functions:
 * mgr.eval_async_<opname>_def - Runs operation asynchronously under a new anonymous sequence
 * seq.record_<opname> - Records operation in sequence (requires sequence to be in recording mode)
 
-You can see these operations being used in the `Simple Python example <https://kompute.cc/index.html#python-example-simple>`_ and in the `Extended Python Example <https://kompute.cc/index.html#python-example-extended>`_.
+Python Example (Simple)
+^^^^^
+
+Then you can interact with it from your interpreter. Below is the same sample as above "Your First Kompute (Simple Version)" but in Python:
+
+.. code-block:: python
+   :linenos:
+
+   mgr = Manager()
+
+   # Can be initialized with List[] or np.Array
+   tensor_in_a = Tensor([2, 2, 2])
+   tensor_in_b = Tensor([1, 2, 3])
+   tensor_out = Tensor([0, 0, 0])
+
+   mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+
+   # Define the function via PyShader or directly as glsl string or spirv bytes
+   @python2shader
+   def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
+                               data1=("buffer", 0, Array(f32)),
+                               data2=("buffer", 1, Array(f32)),
+                               data3=("buffer", 2, Array(f32))):
+       i = index.x
+       data3[i] = data1[i] * data2[i]
+
+   # Run shader operation synchronously
+   mgr.eval_algo_data_def(
+       [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
+
+   # Alternatively can pass raw string/bytes:
+   # shaderFileData = """ shader code here... """
+   # mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderFileData))
+
+   mgr.eval_await_def()
+
+   mgr.eval_tensor_sync_local_def([tensor_out])
+
+   assert tensor_out.data() == [2.0, 4.0, 6.0]
+
+
+Python Example (Extended)
+^^^^^
+
+Similarly you can find the same extended example as above:
+
+.. code-block:: python
+   :linenos:
+
+    mgr = Manager(0, [2])
+
+    # Can be initialized with List[] or np.Array
+    tensor_in_a = Tensor([2, 2, 2])
+    tensor_in_b = Tensor([1, 2, 3])
+    tensor_out = Tensor([0, 0, 0])
+
+    mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out])
+
+    seq = mgr.create_sequence("op")
+
+    # Define the function via PyShader or directly as glsl string or spirv bytes
+    @python2shader
+    def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3),
+                                data1=("buffer", 0, Array(f32)),
+                                data2=("buffer", 1, Array(f32)),
+                                data3=("buffer", 2, Array(f32))):
+        i = index.x
+        data3[i] = data1[i] * data2[i]
+
+    # Run shader operation asynchronously and then await
+    mgr.eval_async_algo_data_def(
+        [tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv())
+    mgr.eval_await_def()
+
+    seq.begin()
+    seq.record_tensor_sync_local([tensor_in_a])
+    seq.record_tensor_sync_local([tensor_in_b])
+    seq.record_tensor_sync_local([tensor_out])
+    seq.end()
+
+    seq.eval()
+
+    assert tensor_out.data() == [2.0, 4.0, 6.0]
 
 Kompute Operation Capabilities
 ^^^^^
 
 Handling multiple capabilites of processing can be done by compute shaders being loaded into separate sequences. The example below shows how this can be done:
 
 .. code-block:: python
-    :linenos:
+   :linenos:
+
     from kp import Manager
 
     # We'll assume we have the shader data available
@@ -77,6 +160,117 @@ Handling multiple capabilites of processing can be done by compute shaders being
 
     print(t1.data(), t2.data(), t3.data())
 
+Machine Learning Logistic Regression Implementation
+^^^^^^
+
+Similar to the logistic regression implementation in the C++ examples section, below you can find the Python implementation of the Logistic Regression algorithm.
+
+.. code-block:: python
+   :linenos:
+
+    @python2shader
+    def compute_shader(
+            index   = ("input", "GlobalInvocationId", ivec3),
+            x_i     = ("buffer", 0, Array(f32)),
+            x_j     = ("buffer", 1, Array(f32)),
+            y       = ("buffer", 2, Array(f32)),
+            w_in    = ("buffer", 3, Array(f32)),
+            w_out_i = ("buffer", 4, Array(f32)),
+            w_out_j = ("buffer", 5, Array(f32)),
+            b_in    = ("buffer", 6, Array(f32)),
+            b_out   = ("buffer", 7, Array(f32)),
+            l_out   = ("buffer", 8, Array(f32)),
+            M       = ("buffer", 9, Array(f32))):
+
+        i = index.x
+
+        m = M[0]
+
+        w_curr = vec2(w_in[0], w_in[1])
+        b_curr = b_in[0]
+
+        x_curr = vec2(x_i[i], x_j[i])
+        y_curr = y[i]
+
+        z_dot = w_curr @ x_curr
+        z = z_dot + b_curr
+        y_hat = 1.0 / (1.0 + exp(-z))
+
+        d_z = y_hat - y_curr
+        d_w = (1.0 / m) * x_curr * d_z
+        d_b = (1.0 / m) * d_z
+
+        loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat)))
+
+        w_out_i[i] = d_w.x
+        w_out_j[i] = d_w.y
+        b_out[i] = d_b
+        l_out[i] = loss
+
+
+    # First we create input and ouput tensors for shader
+    tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0])
+    tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+
+    tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0])
+
+    tensor_w_in = Tensor([0.001, 0.001])
+    tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+    tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_b_in = Tensor([0.0])
+    tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0])
+
+    tensor_m = Tensor([ 5.0 ])
+
+    # We store them in an array for easier interaction
+    params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i,
+        tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m]
+
+    mgr = Manager()
+
+    mgr.eval_tensor_create_def(params)
+
+    # Record commands for efficient evaluation
+    sq = mgr.create_sequence()
+    sq.begin()
+    sq.record_tensor_sync_device([tensor_w_in, tensor_b_in])
+    sq.record_algo_data(params, compute_shader.to_spirv())
+    sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out])
+    sq.end()
+
+    ITERATIONS = 100
+    learning_rate = 0.1
+
+    # Perform machine learning training and inference across all input X and Y
+    for i_iter in range(ITERATIONS):
+        sq.eval()
+
+        # Calculate the parameters based on the respective derivatives calculated
+        w_in_i_val = tensor_w_in.data()[0]
+        w_in_j_val = tensor_w_in.data()[1]
+        b_in_val = tensor_b_in.data()[0]
+
+        for j_iter in range(tensor_b_out.size()):
+            w_in_i_val -= learning_rate * tensor_w_out_i.data()[j_iter]
+            w_in_j_val -= learning_rate * tensor_w_out_j.data()[j_iter]
+            b_in_val -= learning_rate * tensor_b_out.data()[j_iter]
+
+        # Update the parameters to process inference again
+        tensor_w_in.set_data([w_in_i_val, w_in_j_val])
+        tensor_b_in.set_data([b_in_val])
+
+    assert tensor_w_in.data()[0] < 0.01
+    assert tensor_w_in.data()[0] > 0.0
+    assert tensor_w_in.data()[1] > 1.5
+    assert tensor_b_in.data()[0] < 0.7
+
+    # Print outputs
+    print(tensor_w_in.data())
+    print(tensor_b_in.data())
+
 
 Package Installation 
 ^^^^^^^^^

diff --git a/docs/overview/python-reference.rst b/docs/overview/python-reference.rst
@@ -6,9 +6,6 @@ Python Class Documentation & Reference
 This section provides a breakdown of the Python classes and what each of their functions provide.
 Below is a diagram that provides insights on the relationship between Vulkan Kompute objects and Vulkan resources, which primarily encompass ownership of either CPU and/or GPU memory.
 
-.. image:: ../images/kompute-architecture.jpg
-   :width: 70%
-
 Manager
 -------