apacheGH-41089: [C++] Clean up remaining tasks related to half float …

…casts (apache#41084) ### Rationale for this change Address remaining tasks brought up post-merge in this PR: apache#40067 ### What changes are included in this PR? Adds a couple of tests and cleans up cruft on another test. ### Are these changes tested? Yes, as these are test-only related changes. ### Are there any user-facing changes? No. * GitHub Issue: apache#41089 Authored-by: Clif Houck <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
vibhatha · May 25, 2024 · 25ac0cf · 25ac0cf
1 parent 075d65c
commit 25ac0cf
Show file tree

Hide file tree

Showing 3 changed files with 68 additions and 26 deletions.
diff --git a/cpp/src/arrow/ipc/json_simple_test.cc b/cpp/src/arrow/ipc/json_simple_test.cc
@@ -189,21 +189,6 @@ class TestIntegers : public ::testing::Test {
 
 TYPED_TEST_SUITE_P(TestIntegers);
 
-template <typename DataType>
-std::vector<typename DataType::c_type> TestIntegersMutateIfNeeded(
-    std::vector<typename DataType::c_type> data) {
-  return data;
-}
-
-// TODO: This works, but is it the right way to do this?
-template <>
-std::vector<HalfFloatType::c_type> TestIntegersMutateIfNeeded<HalfFloatType>(
-    std::vector<HalfFloatType::c_type> data) {
-  std::for_each(data.begin(), data.end(),
-                [](HalfFloatType::c_type& value) { value = Float16(value).bits(); });
-  return data;
-}
-
 TYPED_TEST_P(TestIntegers, Basics) {
   using T = TypeParam;
   using c_type = typename T::c_type;
@@ -212,17 +197,16 @@ TYPED_TEST_P(TestIntegers, Basics) {
   auto type = this->type();
 
   AssertJSONArray<T>(type, "[]", {});
-  AssertJSONArray<T>(type, "[4, 0, 5]", TestIntegersMutateIfNeeded<T>({4, 0, 5}));
-  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true},
-                     TestIntegersMutateIfNeeded<T>({4, 0, 5}));
+  AssertJSONArray<T>(type, "[4, 0, 5]", {4, 0, 5});
+  AssertJSONArray<T>(type, "[4, null, 5]", {true, false, true}, {4, 0, 5});
 
   // Test limits
   const auto min_val = std::numeric_limits<c_type>::min();
   const auto max_val = std::numeric_limits<c_type>::max();
   std::string json_string = JSONArray(0, 1, min_val);
-  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, min_val}));
+  AssertJSONArray<T>(type, json_string, {0, 1, min_val});
   json_string = JSONArray(0, 1, max_val);
-  AssertJSONArray<T>(type, json_string, TestIntegersMutateIfNeeded<T>({0, 1, max_val}));
+  AssertJSONArray<T>(type, json_string, {0, 1, max_val});
 }
 
 TYPED_TEST_P(TestIntegers, Errors) {
@@ -289,12 +273,6 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt8, TestIntegers, UInt8Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestIntegers, UInt16Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestIntegers, UInt32Type);
 INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestIntegers, UInt64Type);
-// FIXME: I understand that HalfFloatType is backed by a uint16_t, but does it
-// make sense to run this test over it?
-// The way ConvertNumber for HalfFloatType is currently written, it allows the
-// conversion of floating point notation to a half float, which causes failures
-// in this test, one example is asserting 0.0 cannot be parsed as a half float.
-// INSTANTIATE_TYPED_TEST_SUITE_P(TestHalfFloat, TestIntegers, HalfFloatType);
 
 template <typename T>
 class TestStrings : public ::testing::Test {

diff --git a/cpp/src/arrow/util/formatting_util_test.cc b/cpp/src/arrow/util/formatting_util_test.cc
@@ -26,11 +26,13 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
 #include "arrow/util/decimal.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/formatting.h"
 
 namespace arrow {
 
 using internal::StringFormatter;
+using util::Float16;
 
 class StringAppender {
  public:
@@ -280,6 +282,32 @@ TEST(Formatting, Double) {
   AssertFormatting(formatter, -HUGE_VAL, "-inf");
 }
 
+TEST(Formatting, HalfFloat) {
+  StringFormatter<HalfFloatType> formatter;
+
+  AssertFormatting(formatter, Float16(0.0f).bits(), "0");
+  AssertFormatting(formatter, Float16(-0.0f).bits(), "-0");
+  AssertFormatting(formatter, Float16(1.5f).bits(), "1.5");
+
+  // Slightly adapted from values present here
+  // https://blogs.mathworks.com/cleve/2017/05/08/half-precision-16-bit-floating-point-arithmetic/
+  AssertFormatting(formatter, 0x3c00, "1");
+  AssertFormatting(formatter, 0x3c01, "1.0009765625");
+  AssertFormatting(formatter, 0x0400, "0.00006103515625");
+  AssertFormatting(formatter, 0x0001, "5.960464477539063e-8");
+
+  // Can't avoid loss of precision here.
+  AssertFormatting(formatter, Float16(1234.567f).bits(), "1235");
+  AssertFormatting(formatter, Float16(1e3f).bits(), "1000");
+  AssertFormatting(formatter, Float16(1e4f).bits(), "10000");
+  AssertFormatting(formatter, Float16(1e10f).bits(), "inf");
+  AssertFormatting(formatter, Float16(1e15f).bits(), "inf");
+
+  AssertFormatting(formatter, 0xffff, "nan");
+  AssertFormatting(formatter, 0x7c00, "inf");
+  AssertFormatting(formatter, 0xfc00, "-inf");
+}
+
 template <typename T>
 void TestDecimalFormatter() {
   struct TestParam {

diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc
@@ -24,9 +24,13 @@
 
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type.h"
+#include "arrow/util/float16.h"
 #include "arrow/util/value_parsing.h"
 
 namespace arrow {
+
+using util::Float16;
+
 namespace internal {
 
 template <typename T>
@@ -152,6 +156,25 @@ TEST(StringConversion, ToDouble) {
   AssertConversionFails(&converter, "1.5");
 }
 
+TEST(StringConversion, ToHalfFloat) {
+  AssertConversion<HalfFloatType>("1.5", Float16(1.5f).bits());
+  AssertConversion<HalfFloatType>("0", Float16(0.0f).bits());
+  AssertConversion<HalfFloatType>("-0.0", Float16(-0.0f).bits());
+  AssertConversion<HalfFloatType>("-1e15", Float16(-1e15).bits());
+  AssertConversion<HalfFloatType>("+Infinity", 0x7c00);
+  AssertConversion<HalfFloatType>("-Infinity", 0xfc00);
+  AssertConversion<HalfFloatType>("Infinity", 0x7c00);
+
+  AssertConversionFails<HalfFloatType>("");
+  AssertConversionFails<HalfFloatType>("e");
+  AssertConversionFails<HalfFloatType>("1,5");
+
+  StringConverter<HalfFloatType> converter(/*decimal_point=*/',');
+  AssertConversion(&converter, "1,5", Float16(1.5f).bits());
+  AssertConversion(&converter, "0", Float16(0.0f).bits());
+  AssertConversionFails(&converter, "1.5");
+}
+
 #if !defined(_WIN32) || defined(NDEBUG)
 
 TEST(StringConversion, ToFloatLocale) {
@@ -180,6 +203,19 @@ TEST(StringConversion, ToDoubleLocale) {
   AssertConversionFails(&converter, "1,5");
 }
 
+TEST(StringConversion, ToHalfFloatLocale) {
+  // French locale uses the comma as decimal point
+  LocaleGuard locale_guard("fr_FR.UTF-8");
+
+  AssertConversion<HalfFloatType>("1.5", Float16(1.5).bits());
+  AssertConversionFails<HalfFloatType>("1,5");
+
+  StringConverter<HalfFloatType> converter(/*decimal_point=*/'#');
+  AssertConversion(&converter, "1#5", Float16(1.5).bits());
+  AssertConversionFails(&converter, "1.5");
+  AssertConversionFails(&converter, "1,5");
+}
+
 #endif  // _WIN32
 
 TEST(StringConversion, ToInt8) {