microsoft · StephanTLavavej · Oct 3, 2020 · Aug 7, 2020 · Aug 7, 2020 · Aug 14, 2020
diff --git a/stl/inc/charconv b/stl/inc/charconv
@@ -15,6 +15,7 @@
 #include <float.h>
 #include <intrin0.h>
 #include <string.h>
+#include <xbit_ops.h>
 #include <xcharconv.h>
 #include <xcharconv_ryu.h>
 #include <xutility>
@@ -450,40 +451,6 @@ _NODISCARD inline _Big_integer_flt _Make_big_integer_flt_power_of_two(const uint
     return _Xval;
 }
 
-_NODISCARD inline uint32_t _Bit_scan_reverse(const uint32_t _Value) noexcept {
-    unsigned long _Index; // Intentionally uninitialized for better codegen
-
-    if (_BitScanReverse(&_Index, _Value)) {
-        return _Index + 1;
-    }
-
-    return 0;
-}
-
-_NODISCARD inline uint32_t _Bit_scan_reverse(const uint64_t _Value) noexcept {
-    unsigned long _Index; // Intentionally uninitialized for better codegen
-
-#ifdef _WIN64
-    if (_BitScanReverse64(&_Index, _Value)) {
-        return _Index + 1;
-    }
-#else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
-    uint32_t _Ui32 = static_cast<uint32_t>(_Value >> 32);
-
-    if (_BitScanReverse(&_Index, _Ui32)) {
-        return _Index + 1 + 32;
-    }
-
-    _Ui32 = static_cast<uint32_t>(_Value);
-
-    if (_BitScanReverse(&_Index, _Ui32)) {
-        return _Index + 1;
-    }
-#endif // ^^^ 32-bit ^^^
-
-    return 0;
-}
-
 _NODISCARD inline uint32_t _Bit_scan_reverse(const _Big_integer_flt& _Xval) noexcept {
     if (_Xval._Myused == 0) {
         return 0;

diff --git a/stl/inc/random b/stl/inc/random
@@ -7,12 +7,14 @@
 #ifndef _RANDOM_
 #define _RANDOM_
 #include <yvals_core.h>
+
 #if _STL_COMPILER_PREPROCESSOR
 #include <algorithm>
 #include <cmath>
 #include <iosfwd>
 #include <stdint.h>
 #include <vector>
+#include <xbit_ops.h>
 #include <xstring>
 
 #pragma pack(push, _CRT_PACKING)
@@ -2025,6 +2027,45 @@ basic_ostream<_Elem, _Traits>& operator<<(basic_ostream<_Elem, _Traits>& _Ostr,
     return _Dist._Write(_Ostr);
 }
 
+// FUNCTION TEMPLATE _Float_upper_bound
+
+// Returns smallest _Flt such that static_cast<_Ty>(_Result) > _Val.
+// First truncate to largest _Flt <= _Val, then add ceil(ulp).
+
+template <class _Flt, class _Ty>
+_NODISCARD _Flt _Float_upper_bound(_Ty _Val) {
+    static_assert(is_unsigned_v<_Ty> && is_integral_v<_Ty> && is_floating_point_v<_Flt>,
+        "invalid template argument for _Float_upper_bound");
+    constexpr auto _Ty_digits  = numeric_limits<_Ty>::digits;
+    constexpr auto _Flt_digits = numeric_limits<_Flt>::digits;
+    using _Ty_32or64           = conditional_t<_Ty_digits <= 32, uint32_t, uint64_t>;
+
+    if _CONSTEXPR_IF (_Ty_digits <= _Flt_digits) {
+        return static_cast<_Flt>(_Val) + _Flt{1};
+    } else {
+#pragma warning(push)
+#pragma warning(disable : 4146 4293) // unary minus of unsigned, negative shift
+        constexpr auto _Mask = static_cast<_Ty>(-1) << (_Ty_digits - _Flt_digits);
+#ifdef _M_CEE_PURE
+        constexpr auto _Ty_32or64_digits = numeric_limits<_Ty_32or64>::digits;
+        const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_fallback(static_cast<_Ty_32or64>(_Val | _Ty{1}));
+#else // _M_CEE_PURE
+        const auto _Log_plus1 = _Bit_scan_reverse(static_cast<_Ty_32or64>(_Val | _Ty{1}));
+#endif // _M_CEE_PURE
+        const auto _Shifted_mask = _Mask >> (_Ty_digits - _Log_plus1);
+        const auto _Ceil_ulp     = _Shifted_mask & -_Shifted_mask;
+        _Val &= _Shifted_mask;
+        if (_Val == _Mask) {
+            // integer add would overflow
+            constexpr auto _Big_ulp = static_cast<_Flt>(_Mask & -_Mask);
+            return static_cast<_Flt>(_Val) + _Big_ulp;
+        } else {
+            return static_cast<_Flt>(_Val + _Ceil_ulp);
+        }
+#pragma warning(pop)
+    }
+}
+
 // CLASS TEMPLATE geometric_distribution
 template <class _Ty = int>
 class geometric_distribution { // geometric distribution
@@ -2123,7 +2164,15 @@ public:
 private:
     template <class _Engine>
     result_type _Eval(_Engine& _Eng, const param_type& _Par0) const {
-        return static_cast<_Ty>(_CSTD log(_NRAND(_Eng, _Ty1)) / _Par0._Log_1_p);
+        using _Uty = make_unsigned_t<_Ty>;
+        constexpr auto _Ty_max{(numeric_limits<_Ty>::max)()};
+        const auto _Ty1_max{_Float_upper_bound<_Ty1>(static_cast<_Uty>(_Ty_max))};
+
+        _Ty1 _Val;
+        do {
+            _Val = _CSTD log(_NRAND(_Eng, _Ty1)) / _Par0._Log_1_p;
+        } while (_Val >= _Ty1_max);
+        return static_cast<_Ty>(_Val);
     }
 
     param_type _Par;
@@ -2287,12 +2336,17 @@ private:
         }
 
         for (;;) { // generate and reject
+            using _Uty = make_unsigned_t<_Ty>;
+            constexpr auto _Ty_max{(numeric_limits<_Ty>::max)()};
+            const auto _Ty1_max{_Float_upper_bound<_Ty1>(static_cast<_Uty>(_Ty_max))};
+
             _Ty _Res;
             _Ty1 _Yx;
             for (;;) { // generate a tentative value
-                _Yx  = static_cast<_Ty1>(_CSTD tan(_Pi * _NRAND(_Eng, _Ty1)));
-                _Res = static_cast<_Ty>(_Par0._Sqrt * _Yx + _Par0._Mean);
-                if (_Ty{0} <= _Res) {
+                _Yx = static_cast<_Ty1>(_CSTD tan(_Pi * _NRAND(_Eng, _Ty1)));
+                const _Ty1 _Mx{_Par0._Sqrt * _Yx + _Par0._Mean};
+                if (0.0 <= _Mx && _Mx < _Ty1_max) {
+                    _Res = static_cast<_Ty>(_Mx);
                     break;
                 }
             }
@@ -2469,12 +2523,16 @@ private:
             // events are rare, use Poisson distribution
             _Res = _Par0._Small(_Eng);
         } else { // no shortcuts
+            using _Uty = make_unsigned_t<_Ty>;
+            const auto _Ty1_Tx{_Float_upper_bound<_Ty1>(static_cast<_Uty>(_Par0._Tx))};
+
             for (;;) { // generate and reject
                 _Ty1 _Yx;
                 for (;;) { // generate a tentative value
-                    _Yx  = static_cast<_Ty1>(_CSTD tan(_Pi * _NRAND(_Eng, _Ty1)));
-                    _Res = static_cast<_Ty>(_Par0._Sqrt * _Yx + _Par0._Mean);
-                    if (_Ty{0} <= _Res && _Res <= _Par0._Tx) {
+                    _Yx = static_cast<_Ty1>(_CSTD tan(_Pi * _NRAND(_Eng, _Ty1)));
+                    const _Ty1 _Mx{_Par0._Sqrt * _Yx + _Par0._Mean};
+                    if (0.0 <= _Mx && _Mx < _Ty1_Tx) {
+                        _Res = static_cast<_Ty>(_Mx);
                         break;
                     }
                 }
@@ -3122,7 +3180,7 @@ private:
             return _Par0._Beta * _Par0._Exp(_Eng);
         }
 
-        if ((_Count = static_cast<int>(_Par0._Alpha)) == _Par0._Alpha && _Count < 20) {
+        if (_Par0._Alpha < 20.0 && (_Count = static_cast<int>(_Par0._Alpha)) == _Par0._Alpha) {
             // _Alpha is small integer, compute directly
             _Yx = _NRAND(_Eng, _Ty);
             while (--_Count) { // adjust result

diff --git a/stl/inc/xbit_ops.h b/stl/inc/xbit_ops.h
@@ -9,6 +9,7 @@
 #include <yvals.h>
 #if _STL_COMPILER_PREPROCESSOR
 
+#include <cstdint>
 #include <intrin0.h>
 
 #pragma pack(push, _CRT_PACKING)
@@ -51,6 +52,40 @@ _NODISCARD inline unsigned long _Ceiling_of_log_2(const size_t _Value) noexcept
     return 1 + _Floor_of_log_2(_Value - 1);
 }
 
+_NODISCARD inline uint32_t _Bit_scan_reverse(const uint32_t _Value) noexcept {
+    unsigned long _Index; // Intentionally uninitialized for better codegen
+
+    if (_BitScanReverse(&_Index, _Value)) {
+        return _Index + 1;
+    }
+
+    return 0;
+}
+
+_NODISCARD inline uint32_t _Bit_scan_reverse(const uint64_t _Value) noexcept {
+    unsigned long _Index; // Intentionally uninitialized for better codegen
+
+#ifdef _WIN64
+    if (_BitScanReverse64(&_Index, _Value)) {
+        return _Index + 1;
+    }
+#else // ^^^ 64-bit ^^^ / vvv 32-bit vvv
+    uint32_t _Ui32 = static_cast<uint32_t>(_Value >> 32);
+
+    if (_BitScanReverse(&_Index, _Ui32)) {
+        return _Index + 1 + 32;
+    }
+
+    _Ui32 = static_cast<uint32_t>(_Value);
+
+    if (_BitScanReverse(&_Index, _Ui32)) {
+        return _Index + 1;
+    }
+#endif // ^^^ 32-bit ^^^
+
+    return 0;
+}
+
 _STD_END
 
 #pragma pop_macro("new")

diff --git a/tests/std/test.lst b/tests/std/test.lst
@@ -162,10 +162,12 @@ tests\GH_000685_condition_variable_any
 tests\GH_000690_overaligned_function
 tests\GH_000890_pow_template
 tests\GH_000940_missing_valarray_copy
+tests\GH_001001_random_rejection_rounding
 tests\GH_001010_filesystem_error_encoding
 tests\GH_001017_discrete_distribution_out_of_range
 tests\GH_001086_partial_sort_copy
 tests\GH_001103_countl_zero_correctness
+tests\GH_001123_random_cast_out_of_range
 tests\LWG2597_complex_branch_cut
 tests\LWG3018_shared_ptr_function
 tests\P0019R8_atomic_ref

diff --git a/tests/std/tests/GH_001001_random_rejection_rounding/env.lst b/tests/std/tests/GH_001001_random_rejection_rounding/env.lst
@@ -0,0 +1,4 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\usual_matrix.lst
diff --git a/tests/std/tests/GH_001001_random_rejection_rounding/test.cpp b/tests/std/tests/GH_001001_random_rejection_rounding/test.cpp
@@ -0,0 +1,43 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <map>
+#include <random>
+
+void Test_GH1001() {
+    constexpr int N{1000};
+    constexpr double p{.001238};
+    constexpr int seed{12345};
+    constexpr int iters{1'000'000};
+    std::map<int, int> frequency;
+
+    std::mt19937 mt_rand(seed);
+
+    std::binomial_distribution<int> distribution(N, p);
+
+    for (int i = 0; i < iters; ++i) {
+        ++frequency[distribution(mt_rand)];
+    }
+
+    double mean_x{0.0};
+    for (const auto& valueCountPair : frequency) {
+        mean_x += valueCountPair.first * static_cast<double>(valueCountPair.second) / iters;
+    }
+    const double p0_x{static_cast<double>(frequency[0]) / iters};
+    const double p1_x{static_cast<double>(frequency[1]) / iters};
+
+    const double p0{std::pow(1.0 - p, static_cast<double>(N))};
+    const double p1{1000.0 * p * std::pow(1.0 - p, static_cast<double>(N - 1))};
+    const double mean{p * N};
+
+    assert(std::abs(mean_x / mean - 1.0) < 0.01);
+    assert(std::abs(p0_x / p0 - 1.0) < 0.01);
+    assert(std::abs(p1_x / p1 - 1.0) < 0.01);
+}
+
+int main() {
+    Test_GH1001();
+}
diff --git a/tests/std/tests/GH_001123_random_cast_out_of_range/env.lst b/tests/std/tests/GH_001123_random_cast_out_of_range/env.lst
@@ -0,0 +1,4 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\usual_matrix.lst
diff --git a/tests/std/tests/GH_001123_random_cast_out_of_range/test.cpp b/tests/std/tests/GH_001123_random_cast_out_of_range/test.cpp
@@ -0,0 +1,86 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <random>
+
+#pragma warning(disable : 4984) // if constexpr is a C++17 language extension
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wc++17-extensions"
+#endif // __clang__
+
+template <class T>
+using lim = std::numeric_limits<T>;
+
+template <class IntType, class FltType>
+void CheckUpperBound(IntType i, FltType fmax) {
+    const auto x{std::_Float_upper_bound<FltType>(i)};
+    const auto y{std::nextafter(x, FltType{0})}; // lower bound, <= i
+
+    assert(y < fmax);
+    assert(static_cast<IntType>(y) <= i);
+    assert(x <= fmax);
+    if (x < fmax) {
+        assert(static_cast<IntType>(x) > i);
+    }
+}
+
+template <class IntType, class FltType>
+void TestUpperBoundExhaustive() {
+    const auto fmax{exp2(static_cast<FltType>(lim<IntType>::digits))};
+    IntType i{0};
+    do {
+        CheckUpperBound(i, fmax);
+    } while (++i != IntType{0});
+}
+
+template <class T>
+constexpr T FillLsb(int n) {
+    if (n <= 0) {
+        return 0;
+    }
+    T x{T{1} << (n - 1)};
+    return (x - 1) ^ x;
+}
+
+template <class IntType, class FltType>
+void TestUpperBoundSelective() {
+    const auto fmax{exp2(static_cast<FltType>(lim<IntType>::digits))};
+    CheckUpperBound(IntType{0}, fmax);
+    CheckUpperBound(IntType{1}, fmax);
+    CheckUpperBound(lim<IntType>::max(), fmax);
+
+    constexpr int diff{lim<IntType>::digits - lim<FltType>::digits};
+    if constexpr (diff > 0) {
+        // crossover from ulp < 1 to ulp = 1
+        constexpr auto a{FillLsb<IntType>(lim<FltType>::digits - 1)};
+        CheckUpperBound(a - 1, fmax);
+        CheckUpperBound(a, fmax);
+
+        // crossover from ulp = 1 to ulp > 1
+        constexpr auto b{FillLsb<IntType>(lim<FltType>::digits)};
+        CheckUpperBound(b, fmax);
+        CheckUpperBound(b + 1, fmax);
+        CheckUpperBound(b + 2, fmax);
+
+        // saturation at the largest representable IntType
+        constexpr auto c{FillLsb<IntType>(lim<FltType>::digits) << diff};
+        CheckUpperBound(c - 1, fmax);
+        CheckUpperBound(c, fmax);
+        CheckUpperBound(c + 1, fmax);
+    }
+}
+
+int main() {
+    TestUpperBoundExhaustive<std::uint8_t, float>();
+    TestUpperBoundExhaustive<std::uint16_t, float>();
+    TestUpperBoundSelective<std::uint32_t, float>();
+
+    TestUpperBoundExhaustive<unsigned short, double>();
+    TestUpperBoundSelective<unsigned int, double>();
+    TestUpperBoundSelective<unsigned long, double>();
+    TestUpperBoundSelective<unsigned long long, double>();
+}