Add explicit kernel names to facilitate profiling

nucypher · Jul 19, 2019 · b281330 · b281330
1 parent 4b483c4
commit b281330
Show file tree

Hide file tree

Showing 7 changed files with 11 additions and 2 deletions.
diff --git a/nufhe/blind_rotate.mako b/nufhe/blind_rotate.mako
@@ -15,7 +15,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-<%def name="BlindRotate(
+<%def name="blind_rotate(
     kernel_declaration, extracted_a, extracted_b, accum_a, gsw, bara, cdata_forward, cdata_inverse)">
 <%
     tpt = transform.threads_per_transform

diff --git a/nufhe/blind_rotate.py b/nufhe/blind_rotate.py
@@ -154,8 +154,9 @@ def _build_plan(self, plan_factory, device_params, lwe_a, lwe_b, accum_a, gsw, b
 
             try:
                 plan.kernel_call(
-                    TEMPLATE.get_def("BlindRotate"),
+                    TEMPLATE.get_def("blind_rotate"),
                     [lwe_a, lwe_b, accum_a, gsw, bara, cdata_forward, cdata_inverse],
+                    kernel_name="blind_rotate",
                     global_size=(
                         helpers.product(batch_shape),
                         local_size),

diff --git a/nufhe/lwe_gpu.py b/nufhe/lwe_gpu.py
@@ -170,6 +170,7 @@ def _build_plan(
         plan.kernel_call(
             TEMPLATE.get_def("lwe_keyswitch"),
             [result_a, result_b, result_cv, ks_a, ks_b, ks_cv, source_a, source_b],
+            kernel_name="lwe_keyswitch",
             global_size=(helpers.product(batch_shape), self._output_size),
             render_kwds=dict(
                 slices=(len(batch_shape), 1),
@@ -306,6 +307,7 @@ def _build_plan(
         plan.kernel_call(
             TEMPLATE.get_def("lwe_linear"),
             [result_a, result_b, result_cv, source_a, source_b, source_cv, coeff],
+            kernel_name="lwe_linear",
             global_size=result_a.shape,
             render_kwds=dict(
                 add_result=self._add_result,
@@ -329,6 +331,7 @@ def _build_plan(self, plan_factory, device_params, result_a, result_b, result_cv
         plan.kernel_call(
             TEMPLATE.get_def("lwe_noiseless_trivial"),
             [result_a, result_b, result_cv, mus],
+            kernel_name="lwe_noiseless_trivial",
             global_size=result_a.shape)
 
         return plan

diff --git a/nufhe/polynomials_gpu.py b/nufhe/polynomials_gpu.py
@@ -70,6 +70,7 @@ def _build_plan(self, plan_factory, device_params, result, source, powers, power
         plan.kernel_call(
             TEMPLATE.get_def("shift_torus_polynomial"),
             [result, source, powers, powers_idx],
+            kernel_name="shift_torus_polynomial",
             global_size=(
                 helpers.product(self._batch_shape),
                 helpers.product(poly_batch_shape),

diff --git a/nufhe/tgsw_gpu.py b/nufhe/tgsw_gpu.py
@@ -194,6 +194,7 @@ def _build_plan(self, plan_factory, device_params, result_a, messages):
         plan.kernel_call(
             TEMPLATE.get_def("tgsw_add_message"),
             [result_a, messages],
+            kernel_name="tgsw_add_message",
             global_size=(batch_len,),
             render_kwds=dict(
                 slices=(len(messages.shape), 1, 1, 1, 1),

diff --git a/nufhe/tlwe_gpu.py b/nufhe/tlwe_gpu.py
@@ -98,6 +98,7 @@ def _build_plan(self, plan_factory, device_params, result_a, result_b, tlwe_a):
         plan.kernel_call(
             TEMPLATE.get_def('tlwe_extract_lwe_samples'),
             [result_a, result_b, tlwe_a],
+            kernel_name="tlwe_extract_lwe_samples",
             global_size=(batch_len, self._mask_size, self._polynomial_degree),
             render_kwds=dict(
                 slices=(len(result_b.shape), 1, 1),
@@ -183,6 +184,7 @@ def _build_plan(
         plan.kernel_call(
             TEMPLATE.get_def("tlwe_encrypt_zero_fill_result"),
             [result_a, result_cv, noises1, noises2, ift_res],
+            kernel_name="tlwe_encrypt_zero_fill_result",
             global_size=(batch_len, self._mask_size + 1, polynomial_degree),
             render_kwds=dict(
                 noise=self._noise, mask_size=self._mask_size,

diff --git a/nufhe/transform/computation.py b/nufhe/transform/computation.py
@@ -73,6 +73,7 @@ def _build_plan(self, plan_factory, device_params, output, input_):
                 plan.kernel_call(
                     TEMPLATE.get_def('standalone_transform'),
                         [output, input_, cdata],
+                        kernel_name="standalone_transform",
                         global_size=(
                             blocks_num,
                             self._transform.threads_per_transform * tpb),