NVIDIA · thirtiseven · Dec 18, 2023 · Oct 13, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
@@ -164,6 +164,7 @@ add_library(
   src/ZOrderJni.cpp
   src/bloom_filter.cu
   src/cast_decimal_to_string.cu
+  src/format_float.cu
   src/cast_float_to_string.cu
   src/cast_string.cu
   src/cast_string_to_float.cu

diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -125,6 +126,21 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(J
   CATCH_CAST_EXCEPTION(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloatWithFormat(
+  JNIEnv* env, jclass, jlong input_column, jint digits)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::release_as_jlong(
+      spark_rapids_jni::format_float(cv, digits, cudf::get_default_stream()));
+  }
+  CATCH_CAST_EXCEPTION(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
                                                                                  jclass,
                                                                                  jlong input_column)

diff --git a/src/main/cpp/src/cast_string.hpp b/src/main/cpp/src/cast_string.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -115,6 +115,12 @@ std::unique_ptr<cudf::column> string_to_float(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+std::unique_ptr<cudf::column> format_float(
+  cudf::column_view const& input,
+  int digits,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 std::unique_ptr<cudf::column> float_to_string(
   cudf::column_view const& input,
   rmm::cuda_stream_view stream,

diff --git a/src/main/cpp/src/format_float.cu b/src/main/cpp/src/format_float.cu
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cast_string.hpp"
+#include "ftos_converter.cuh"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/strings/detail/strings_children.cuh>
+#include <cudf/utilities/type_dispatcher.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
+namespace spark_rapids_jni {
+
+namespace detail {
+namespace {
+
+template <typename FloatType>
+struct format_float_fn {
+  cudf::column_device_view d_floats;
+  int const digits;
+  cudf::size_type* d_offsets;
+  char* d_chars;
+
+  __device__ cudf::size_type compute_output_size(FloatType value, int digits) const
+  {
+    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    return static_cast<cudf::size_type>(
+      ftos_converter::compute_format_float_size(static_cast<double>(value), digits, is_float));
+  }
+
+  __device__ void format_float(cudf::size_type idx, int digits) const
+  {
+    auto const value        = d_floats.element<FloatType>(idx);
+    bool constexpr is_float = std::is_same_v<FloatType, float>;
+    auto const output       = d_chars + d_offsets[idx];
+    ftos_converter::format_float(static_cast<double>(value), digits, is_float, output);
+  }
+
+  __device__ void operator()(cudf::size_type idx) const
+  {
+    if (d_floats.is_null(idx)) {
+      if (d_chars == nullptr) { d_offsets[idx] = 0; }
+      return;
+    }
+    if (d_chars != nullptr) {
+      format_float(idx, digits);
+    } else {
+      d_offsets[idx] = compute_output_size(d_floats.element<FloatType>(idx), digits);
+    }
+  }
+};
+
+/**
+ * @brief This dispatch method is for converting floats into strings.
+ *
+ * The template function declaration ensures only float types are allowed.
+ */
+struct dispatch_format_float_fn {
+  template <typename FloatType, CUDF_ENABLE_IF(std::is_floating_point_v<FloatType>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const& floats,
+                                           int digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr) const
+  {
+    auto const strings_count = floats.size();
+    if (strings_count == 0) { return cudf::make_empty_column(cudf::type_id::STRING); }
+
+    auto const input_ptr = cudf::column_device_view::create(floats, stream);
+
+    auto [offsets, chars] = cudf::strings::detail::make_strings_children(
+      format_float_fn<FloatType>{*input_ptr, digits}, strings_count, stream, mr);
+
+    return cudf::make_strings_column(strings_count,
+                                     std::move(offsets),
+                                     std::move(chars),
+                                     floats.null_count(),
+                                     cudf::detail::copy_bitmask(floats, stream, mr));
+  }
+
+  // non-float types throw an exception
+  template <typename T, CUDF_ENABLE_IF(not std::is_floating_point_v<T>)>
+  std::unique_ptr<cudf::column> operator()(cudf::column_view const&,
+                                           int,
+                                           rmm::cuda_stream_view,
+                                           rmm::mr::device_memory_resource*) const
+  {
+    CUDF_FAIL("Values for format_float function must be a float type.");
+  }
+};
+
+}  // namespace
+
+// This will convert all float column types into a strings column.
+std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
+                                           int digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  return type_dispatcher(floats.type(), dispatch_format_float_fn{}, floats, digits, stream, mr);
+}
+
+}  // namespace detail
+
+// external API
+std::unique_ptr<cudf::column> format_float(cudf::column_view const& floats,
+                                           int digits,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::format_float(floats, digits, stream, mr);
+}
+
+}  // namespace spark_rapids_jni