Skip to content

Commit

Permalink
Several improvements
Browse files Browse the repository at this point in the history
- address review comment
- CI fixes
- implemented scientific
- implemented fixed
- adjust pattern to subject sequence instead of allowing whitespace
- Implemented out of range issues
- Partly enabled the MSVC tests for from_chars floating-point
- Fixes exponent out of bounds reading
  • Loading branch information
mordante committed Aug 12, 2024
1 parent 8041880 commit f2072fc
Show file tree
Hide file tree
Showing 7 changed files with 706 additions and 79 deletions.
151 changes: 101 additions & 50 deletions libcxx/src/include/from_chars_floating_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <charconv>
#include <concepts>
#include <limits>
#include <cstring>
#include <type_traits>

// Included for the _Floating_type_traits class
Expand All @@ -36,9 +35,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
// - __ptr is the current position is the input string. This is points beyond
// the initial I character.
// - __negative whether a valid string represents -inf or +inf.
template <floating_point _Tp>
template <floating_point _Fp>
from_chars_result __from_chars_floating_point_inf(
const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
if (__last - __ptr < 2) [[unlikely]]
return {__first, errc::invalid_argument};

Expand All @@ -60,19 +59,19 @@ from_chars_result __from_chars_floating_point_inf(
&& std::tolower(__ptr[4]) == 'y')
__ptr += 5;

if constexpr (numeric_limits<_Tp>::has_infinity) {
if constexpr (numeric_limits<_Fp>::has_infinity) {
if (__negative)
__value = -std::numeric_limits<_Tp>::infinity();
__value = -std::numeric_limits<_Fp>::infinity();
else
__value = std::numeric_limits<_Tp>::infinity();
__value = std::numeric_limits<_Fp>::infinity();

return {__ptr, std::errc{}};
} else {
return {__ptr, errc::result_out_of_range};
}
}

// Parses an infinita string.
// Parses a nan string.
// Valid strings are case insentitive and contain INF or INFINITY.
//
// - __first is the first argument to std::from_chars. When the string is invalid
Expand All @@ -82,9 +81,9 @@ from_chars_result __from_chars_floating_point_inf(
// - __ptr is the current position is the input string. This is points beyond
// the initial N character.
// - __negative whether a valid string represents -nan or +nan.
template <floating_point _Tp>
template <floating_point _Fp>
from_chars_result __from_chars_floating_point_nan(
const char* const __first, const char* __last, _Tp& __value, const char* __ptr, bool __negative) {
const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
if (__last - __ptr < 2) [[unlikely]]
return {__first, errc::invalid_argument};

Expand All @@ -111,32 +110,33 @@ from_chars_result __from_chars_floating_point_nan(
}

if (__negative)
__value = -std::numeric_limits<_Tp>::quiet_NaN();
__value = -std::numeric_limits<_Fp>::quiet_NaN();
else
__value = std::numeric_limits<_Tp>::quiet_NaN();
__value = std::numeric_limits<_Fp>::quiet_NaN();

return {__ptr, std::errc{}};
}

template <floating_point _Tp>
template <floating_point _Fp>
from_chars_result __from_chars_floating_point_decimal(
const char* const __first,
const char* __last,
_Tp& __value,
_Fp& __value,
chars_format __fmt,
const char* __ptr,
bool __negative) {
using _Traits = _Floating_type_traits<_Tp>;
using _Traits = _Floating_type_traits<_Fp>;
using _Uint_type = typename _Traits::_Uint_type;
ptrdiff_t length = __last - __first;
_LIBCPP_ASSERT_INTERNAL(length > 0, "");

const char* src = __ptr; // rename to match the libc code copied for this section.
const char* src = __ptr; // rename to match the libc code copied for this section.
ptrdiff_t length = __last - src;
_LIBCPP_ASSERT_INTERNAL(length > 0, "");

_Uint_type mantissa = 0;
int exponent = 0;
bool truncated = false;
bool seen_digit = false;
bool has_valid_exponent = false;
bool after_decimal = false;
size_t index = 0;
const size_t BASE = 10;
Expand All @@ -145,6 +145,7 @@ from_chars_result __from_chars_floating_point_decimal(

// The loop fills the mantissa with as many digits as it can hold
const _Uint_type bitstype_max_div_by_base = numeric_limits<_Uint_type>::max() / BASE;

while (index < static_cast<size_t>(length)) {
if (LIBC_NAMESPACE::internal::isdigit(src[index])) {
uint32_t digit = src[index] - '0';
Expand Down Expand Up @@ -178,7 +179,7 @@ from_chars_result __from_chars_floating_point_decimal(
}

if (!seen_digit)
return {src + index, {}};
return {__first, errc::invalid_argument};

if (index < static_cast<size_t>(length) && LIBC_NAMESPACE::internal::tolower(src[index]) == EXPONENT_MARKER) {
bool has_sign = false;
Expand All @@ -187,8 +188,10 @@ from_chars_result __from_chars_floating_point_decimal(
}
if (index + 1 + static_cast<size_t>(has_sign) < static_cast<size_t>(length) &&
LIBC_NAMESPACE::internal::isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
has_valid_exponent = true;
++index;
auto result = LIBC_NAMESPACE::internal::strtointeger<int32_t>(src + index, 10);
auto result =
LIBC_NAMESPACE::internal::strtointeger<int32_t>(src + index, 10, static_cast<size_t>(length) - index);
// if (result.has_error())
// output.error = result.error;
int32_t add_to_exponent = result.value;
Expand All @@ -199,57 +202,102 @@ from_chars_result __from_chars_floating_point_decimal(

// If the result is in the valid range, then we use it. The valid range is
// also within the int32 range, so this prevents overflow issues.
if (temp_exponent > LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
exponent = LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
} else if (temp_exponent < -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
exponent = -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
if (temp_exponent > LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT) {
exponent = LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT;
} else if (temp_exponent < -LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT) {
exponent = -LIBC_NAMESPACE::fputil::FPBits<_Fp>::MAX_BIASED_EXPONENT;
} else {
exponent = static_cast<int32_t>(temp_exponent);
}
}
}

LIBC_NAMESPACE::internal::ExpandedFloat<_Tp> expanded_float = {0, 0};
// [charconv.from.chars]
switch (__fmt) {
case chars_format::scientific:
// 6.2 if fmt has chars_format::scientific set but not chars_format::fixed,
// the otherwise optional exponent part shall appear;
if (!has_valid_exponent)
return {__first, errc::invalid_argument};
break;
case chars_format::fixed:
// 6.3 if fmt has chars_format::fixed set but not chars_format::scientific,
// the optional exponent part shall not appear;
if (has_valid_exponent)
return {__first, errc::invalid_argument};
break;
case chars_format::general:
case chars_format::hex: // impossible but it silences the compiler
break;
}

LIBC_NAMESPACE::internal::ExpandedFloat<_Fp> expanded_float = {0, 0};
errc status{};
if (mantissa != 0) {
auto temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Tp>(
auto temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Fp>(
{mantissa, exponent}, truncated, LIBC_NAMESPACE::internal::RoundDirection::Nearest, src, length);
expanded_float = temp.num;
// Note: there's also an error value in temp.error. I'm not doing that error handling right now though.
if (temp.error == ERANGE) {
status = errc::result_out_of_range;
}
}

auto result = LIBC_NAMESPACE::fputil::FPBits<_Tp>();
auto result = LIBC_NAMESPACE::fputil::FPBits<_Fp>();
result.set_mantissa(expanded_float.mantissa);
result.set_biased_exponent(expanded_float.exponent);

// C17 7.12.1/6
// The result underflows if the magnitude of the mathematical result is so
// small that the mathematical re- sult cannot be represented, without
// extraordinary roundoff error, in an object of the specified type.237) If
// the result underflows, the function returns an implementation-defined
// value whose magnitude is no greater than the smallest normalized positive
// number in the specified type; if the integer expression math_errhandling
// & MATH_ERRNO is nonzero, whether errno acquires the value ERANGE is
// implementation-defined; if the integer expression math_errhandling &
// MATH_ERREXCEPT is nonzero, whether the "underflow" floating-point
// exception is raised is implementation-defined.
//
// LLLVM-LIBC sets ERAGNE for subnormal values
//
// [charconv.from.chars]/1
// ... If the parsed value is not in the range representable by the type of
// value, value is unmodified and the member ec of the return value is
// equal to errc::result_out_of_range. ...
//
// Undo the ERANGE for subnormal values.
if (status == errc::result_out_of_range && result.is_subnormal() && !result.is_zero())
status = errc{};

if (__negative)
__value = -result.get_val();
else
__value = result.get_val();
return {src + index, {}};

return {src + index, status};
}

template <floating_point _Tp>
template <floating_point _Fp>
from_chars_result
__from_chars_floating_point(const char* const __first, const char* __last, _Tp& __value, chars_format __fmt) {
__from_chars_floating_point(const char* const __first, const char* __last, _Fp& __value, chars_format __fmt) {
if (__first == __last) [[unlikely]]
return {__first, errc::invalid_argument};

const char* __ptr = __first;

// skip whitespace
while (std::isspace(*__ptr)) {
++__ptr;
if (__ptr == __last) [[unlikely]]
return {__first, errc::invalid_argument}; // is this valid??
}

bool __negative = *__ptr == '-';
bool __negative = *__ptr == '-';
if (__negative) {
++__ptr;
if (__ptr == __last) [[unlikely]]
return {__first, errc::invalid_argument};
}

if (!std::isdigit(*__ptr)) {
// [charconv.from.chars]
// [Note 1: If the pattern allows for an optional sign, but the string has
// no digit characters following the sign, no characters match the pattern.
// — end note]
// This is true for integrals, floating point allows -.0
switch (std::tolower(*__ptr)) {
case 'i':
// TODO Evaluate the other implementations
// [charconv.from.chars]/6.2
// if fmt has chars_format::scientific set but not chars_format::fixed,
Expand All @@ -259,20 +307,23 @@ __from_chars_floating_point(const char* const __first, const char* __last, _Tp&
if (__fmt == chars_format::scientific)
return {__first, errc::invalid_argument};

switch (std::tolower(*__ptr)) {
case 'i':
return __from_chars_floating_point_inf(__first, __last, __value, __ptr + 1, __negative);
case 'n':
if constexpr (numeric_limits<_Tp>::has_quiet_NaN)
return __from_chars_floating_point_nan(__first, __last, __value, __ptr + 1, __negative);
[[fallthrough]];
default:
return __from_chars_floating_point_inf(__first, __last, __value, __ptr + 1, __negative);
case 'n':
// TODO Evaluate the other implementations
// [charconv.from.chars]/6.2
// if fmt has chars_format::scientific set but not chars_format::fixed,
// the otherwise optional exponent part shall appear;
// Since INF/NAN do not have an exponent this value is not valid.
// See LWG3456
if (__fmt == chars_format::scientific)
return {__first, errc::invalid_argument};
}
if constexpr (numeric_limits<_Fp>::has_quiet_NaN)
return __from_chars_floating_point_nan(__first, __last, __value, __ptr + 1, __negative);
return {__first, errc::invalid_argument};
}

#if 1
_LIBCPP_ASSERT_INTERNAL(__fmt == std::chars_format::general, "");
_LIBCPP_ASSERT_INTERNAL(__fmt != std::chars_format::hex, "");
#else
if (__fmt == chars_format::hex)
return std::__from_chars_floating_point_hex(__first, __last, __value);
Expand Down
Loading

0 comments on commit f2072fc

Please sign in to comment.