From 8d298aada04645ffcaf54463117c97b6d3e8ef70 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Tue, 13 Apr 2021 13:57:32 -0700
Subject: [PATCH 1/7] `format` assumes strings are encoded in the active code
 page

Refactors all `format` functionality with knowledge of character encodings into a new class `_Fmt_codec`. `_Fmt_codec` internally caches the `_Cvtvec` structure that other functions previously passed around, and makes use of a new "pure C ABI function" `__std_get_cvt` to retrieve character conversion info for the active codepage.

The ABI function reuses the `__std_code_page` and `__std_win_error` types from `<xfilesystem_abi.h>`. These could be extracted into a more general `__msvc_win_abi.hpp` header, but I think `<xfilesystem_abi.h>` is lightweight enough to just include directly for now.

Drive-by:
* BUG: `_Parse_precision` was not skipping the trailing `}` in a dynamic precision.
* Improvement: `test_parse_helper` now verifies that the parse consumes the entire input when passed an expected length of `npos`.
---
 stl/CMakeLists.txt                            |   1 +
 stl/inc/format                                | 397 ++++++++++--------
 stl/inc/xfilesystem_abi.h                     |   2 +-
 stl/src/format.cpp                            |  41 ++
 tests/std/include/test_format_support.hpp     |   5 +-
 .../test.cpp                                  |  27 --
 .../env.lst                                   |   7 +-
 .../test.cpp                                  |  67 ++-
 .../P0645R10_text_formatting_parsing/test.cpp |  18 +-
 .../P0645R10_text_formatting_utf8/env.lst     |  26 +-
 .../P0645R10_text_formatting_utf8/test.cpp    |   4 +
 11 files changed, 326 insertions(+), 269 deletions(-)
 create mode 100644 stl/src/format.cpp
diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt
index 751c6ad78a..12fa2b9255 100644
--- a/stl/CMakeLists.txt
+++ b/stl/CMakeLists.txt
@@ -249,6 +249,7 @@ endforeach()
 # Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib.
 set(IMPLIB_SOURCES
     ${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
+    ${CMAKE_CURRENT_LIST_DIR}/src/format.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
     ${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
diff --git a/stl/inc/format b/stl/inc/format
index 939581aff9..dea6e05e8c 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -53,6 +53,7 @@
 #include <stdexcept>
 #include <string>
 #include <string_view>
+#include <xfilesystem_abi.h>
 #include <xutility>
 
 #pragma pack(push, _CRT_PACKING)
@@ -62,6 +63,9 @@ _STL_DISABLE_CLANG_WARNINGS
 #pragma push_macro("new")
 #undef new
 
+extern "C" _NODISCARD __std_win_error __stdcall __std_get_cvt(
+    const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept;
+
 _STD_BEGIN
 
 class format_error : public runtime_error {
@@ -444,95 +448,231 @@ _NODISCARD constexpr bool _Is_execution_charset_utf8() {
 #pragma warning(pop)
 }
 
-inline constexpr bool _Is_execution_charset_utf8_v = _Is_execution_charset_utf8();
+inline constexpr char16_t _Width_estimate_low_intervals[] = { // Per N4885 [format.string.std]/11
+    0x1100u, 0x1160u, 0x2329u, 0x232Bu, 0x2E80u, 0x303Fu, 0x3040u, 0xA4D0u, 0xAC00u, 0xD7A4u, 0xF900u, 0xFB00u, 0xFE10u,
+    0xFE1Au, 0xFE30u, 0xFE70u, 0xFF00u, 0xFF61u, 0xFFE0u, 0xFFE7u};
 
-_NODISCARD constexpr int _Utf8_code_units_in_next_character(
-    const char* const _First, const char* const _Last) noexcept {
-    // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last),
-    // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
-    const auto _Ch = static_cast<unsigned char>(*_First);
-    if (_Ch < 0b1000'0000u) {
-        return 1;
+inline constexpr char32_t _Width_estimate_high_intervals[] = { // Per N4885 [format.string.std]/11
+    0x1F300u, 0x1F650u, 0x1F900u, 0x1FA00u, 0x20000u, 0x2FFFEu, 0x30000u, 0x3FFFEu};
+
+template <auto& _Bounds>
+_NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
+    // Computes the width estimation for Unicode characters from N4885 [format.string.std]/11
+    int _Result = 1;
+    for (const auto& _Bound : _Bounds) {
+        if (_Ch < _Bound) {
+            return _Result;
+        }
+        _Result ^= 1;
     }
 
-    const auto _Len = static_cast<size_t>(_Last - _First);
+    return 1;
+}
+
+template <class _CharT, bool _Statically_Utf8 = _Is_execution_charset_utf8()>
+class _Fmt_codec;
+
+template <bool _Statically_Utf8>
+class _Fmt_codec_base {};
 
-    if (_Ch < 0b1110'0000u) {
-        // check for non-lead byte or partial 2-byte encoded character
-        return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
+template <>
+class _Fmt_codec_base<false> {
+protected:
+    _Cvtvec _Cvt;
+
+    _NODISCARD int _Double_byte_encoding_code_units_in_next_character(
+        const char* const _First, const char* const _Last) const {
+        // Returns a count of the number of code units that compose the first encoded character in [_First, _Last),
+        // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
+        wchar_t _Wide;
+        mbstate_t _St{};
+        const auto _Len   = static_cast<size_t>(_Last - _First);
+        const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
+        if (_Result > 0) {
+            return _Result;
+        } else if (_Result < 0) { // invalid or incomplete encoded character
+            return -1;
+        } else { // next code unit is '\0'
+            return 1;
+        }
     }
 
-    if (_Ch < 0b1111'0000u) {
-        // check for partial 3-byte encoded character
-        return (_Len >= 3) ? 3 : -1;
+    _Fmt_codec_base() {
+        if (__std_get_cvt(__std_code_page::_Acp, &_Cvt) != __std_win_error::_Success) {
+            _THROW(format_error("FIXME"));
+        }
     }
+};
 
-    // check for partial 4-byte encoded character
-    return (_Len >= 4) ? 4 : -1;
-}
+template <bool _Statically_Utf8>
+class _Fmt_codec<char, _Statically_Utf8> : private _Fmt_codec_base<_Statically_Utf8> {
+private:
+    _NODISCARD static constexpr int _Utf8_code_units_in_next_character(
+        const char* const _First, const char* const _Last) noexcept {
+        // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First,
+        // _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead
+        // byte.
+        const auto _Ch = static_cast<unsigned char>(*_First);
+        if (_Ch < 0b1000'0000u) {
+            return 1;
+        }
 
-_NODISCARD inline int _Double_byte_encoding_code_units_in_next_character(
-    const char* const _First, const char* const _Last, const _Cvtvec& _Cvt) {
-    // Returns a count of the number of code units that compose the first encoded character in [_First, _Last),
-    // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
-    wchar_t _Wide;
-    mbstate_t _St{};
-    const auto _Len   = static_cast<size_t>(_Last - _First);
-    const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
-    if (_Result > 0) {
-        return _Result;
-    } else if (_Result < 0) { // invalid or incomplete encoded character
-        return -1;
-    } else { // next code unit is '\0'
-        return 1;
-    }
-}
+        const auto _Len = static_cast<size_t>(_Last - _First);
 
-_NODISCARD inline int _Code_units_in_next_character(const char* _First, const char* _Last, const _Cvtvec& _Cvt) {
-    // Returns a count of the number of code units that compose the first encoded character in
-    // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
-    // *_First is not a valid lead byte.
-    _STL_INTERNAL_CHECK(_First < _Last);
+        if (_Ch < 0b1110'0000u) {
+            // check for non-lead byte or partial 2-byte encoded character
+            return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
+        }
 
-    if constexpr (_Is_execution_charset_utf8_v) {
-        return _Utf8_code_units_in_next_character(_First, _Last);
-    } else {
-        switch (_Cvt._Mbcurmax) {
+        if (_Ch < 0b1111'0000u) {
+            // check for partial 3-byte encoded character
+            return (_Len >= 3) ? 3 : -1;
+        }
+
+        // check for partial 4-byte encoded character
+        return (_Len >= 4) ? 4 : -1;
+    }
+
+    _NODISCARD static constexpr int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
+        // Return an estimate for the width of the character composed of _Units code units,
+        // whose first code unit is denoted by _Ptr.
+        auto _Ch = static_cast<char32_t>(*_Ptr);
+        switch (_Units) {
         default:
-            _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
-            [[fallthrough]];
         case 1:
-            return 1; // all characters have only one code unit
-
         case 2:
-            return _Double_byte_encoding_code_units_in_next_character(_First, _Last, _Cvt);
+            return 1;
+        case 3:
+            _Ch &= 0b1111u;
+            break;
+        case 4:
+            _Ch &= 0b111u;
+            break;
+        }
+
+        for (int _Idx = 1; _Idx < _Units; ++_Idx) {
+            _Ch = _Ch << 6 | (_Ptr[_Idx] & 0b11'1111u);
+        }
 
-        case 4: // Assume UTF-8 (as does _Mbrtowc)
+        if (_Units == 3) {
+            return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
+        }
+
+        return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
+    }
+
+public:
+    _NODISCARD int _Units_in_next_character(const char* _First, const char* _Last) const noexcept {
+        // Returns a count of the number of code units that compose the first encoded character in
+        // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
+        // *_First is not a valid lead byte.
+        _STL_INTERNAL_CHECK(_First < _Last);
+
+        if constexpr (_Statically_Utf8) {
             return _Utf8_code_units_in_next_character(_First, _Last);
+        } else {
+            switch (this->_Cvt._Mbcurmax) {
+            default:
+                _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
+                [[fallthrough]];
+            case 1:
+                return 1; // all characters have only one code unit
+
+            case 2:
+                return this->_Double_byte_encoding_code_units_in_next_character(_First, _Last);
+
+            case 4: // Assume UTF-8 (as does _Mbrtowc)
+                return _Utf8_code_units_in_next_character(_First, _Last);
+            }
         }
     }
-}
 
-_NODISCARD inline int _Code_units_in_next_character(const wchar_t* _First, const wchar_t* _Last, const _Cvtvec&) {
-    // Returns a count of the number of code units that compose the first encoded character in
-    // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
-    // *_First is an unpaired surrogate.
-    _STL_INTERNAL_CHECK(_First < _Last);
+    _NODISCARD const char* _Find_encoded(const char* _First, const char* _Last, const char _Val) const {
+        // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
+        // continuation byte) in [_First, _Last).
+        if constexpr (_Statically_Utf8) {
+            return _Find_unchecked(_First, _Last, _Val);
+        } else {
+            if (this->_Cvt._Mbcurmax == 1 || this->_Cvt._Mbcurmax == 4) {
+                // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
+                return _Find_unchecked(_First, _Last, _Val);
+            }
 
-    if (*_First < 0xD800u || *_First >= 0xE000u) {
-        return 1;
+            while (_First != _Last && *_First != _Val) {
+                const int _Units = _Units_in_next_character(_First, _Last);
+                if (_Units < 0) {
+                    _THROW(format_error("Invalid encoded character in format string."));
+                }
+                _First += _Units;
+            }
+
+            return _First;
+        }
     }
 
-    if (*_First >= 0xDC00u) { // unpaired low surrogate
-        return -1;
+    _NODISCARD int _Estimate_width(const char* const _Ptr, const int _Units) const {
+        // Return an estimate for the width of the character composed of _Units code units,
+        // whose first code unit is denoted by _Ptr.
+        if constexpr (_Statically_Utf8) {
+            return _Estimate_utf8_character_width(_Ptr, _Units);
+        } else {
+            if (this->_Cvt._Mbcurmax != 4) {
+                // not a Unicode encoding; estimate width == number of code units
+                return _Units;
+            }
+
+            // assume UTF-8
+            return _Estimate_utf8_character_width(_Ptr, _Units);
+        }
     }
+};
 
-    if (++_First == _Last || *_First < 0xDC00u || *_First >= 0xE000u) { // unpaired high surrogate
-        return -1;
+template <bool _Statically_Utf8>
+class _Fmt_codec<wchar_t, _Statically_Utf8> {
+private:
+public:
+    _NODISCARD int _Units_in_next_character(const wchar_t* _First, const wchar_t* _Last) const noexcept {
+        // Returns a count of the number of code units that compose the first encoded character in
+        // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
+        // *_First is an unpaired surrogate.
+        _STL_INTERNAL_CHECK(_First < _Last);
+
+        if (*_First < 0xD800u || *_First >= 0xE000u) {
+            return 1;
+        }
+
+        if (*_First >= 0xDC00u) { // unpaired low surrogate
+            return -1;
+        }
+
+        if (++_First == _Last || *_First < 0xDC00u || *_First >= 0xE000u) { // unpaired high surrogate
+            return -1;
+        }
+
+        return 2; // surrogate pair
     }
 
-    return 2; // surrogate pair
-}
+    _NODISCARD const wchar_t* _Find_encoded(const wchar_t* _First, const wchar_t* _Last, const wchar_t _Val) const {
+        // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
+        // continuation byte) in [_First, _Last).
+        return _Find_unchecked(_First, _Last, _Val);
+    }
+
+    _NODISCARD int _Estimate_width(const wchar_t* const _Ptr, const int _Units) const {
+        // Return an estimate for the width of the character composed of _Units code units,
+        // whose first code unit is denoted by _Ptr.
+        auto _Ch = static_cast<char32_t>(*_Ptr);
+        if (_Units == 1) {
+            return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
+        }
+
+        // surrogate pair
+        _Ch = (_Ch - 0xD8000u) << 10;
+        _Ch += static_cast<char32_t>(_Ptr[1]) - 0xDC00u;
+        _Ch += 0x10000u;
+        return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
+    }
+};
 
 template <class _CharT, _Parse_align_callbacks<_CharT> _Callbacks_type>
 _NODISCARD const _CharT* _Parse_align(const _CharT* _Begin, const _CharT* _End, _Callbacks_type&& _Callbacks) {
@@ -540,7 +680,7 @@ _NODISCARD const _CharT* _Parse_align(const _CharT* _Begin, const _CharT* _End,
     _STL_INTERNAL_CHECK(_Begin != _End && *_Begin != '}');
     auto _Parsed_align = _Fmt_align::_None;
 
-    const int _Units = _Code_units_in_next_character(_Begin, _End, _Getcvt());
+    const int _Units = _Fmt_codec<_CharT>{}._Units_in_next_character(_Begin, _End);
     if (_Units < 0) { // invalid fill character encoding
         _THROW(format_error("Invalid format string."));
     }
@@ -800,44 +940,19 @@ _NODISCARD constexpr const _CharT* _Parse_replacement_field(
     return _Begin + 1;
 }
 
-template <class _CharT>
-_NODISCARD const _CharT* _Find_encoded(
-    const _CharT* _First, const _CharT* _Last, const _CharT _Val, const _Cvtvec& _Cvt) {
-    // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
-    // continuation byte) in [_First, _Last).
-    if constexpr (_Is_execution_charset_utf8_v) {
-        return _Find_unchecked(_First, _Last, _Val);
-    } else {
-        if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) {
-            // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
-            return _Find_unchecked(_First, _Last, _Val);
-        }
-
-        while (_First != _Last && *_First != _Val) {
-            const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt);
-            if (_Units < 0) {
-                _THROW(format_error("Invalid encoded character in format string."));
-            }
-            _First += _Units;
-        }
-
-        return _First;
-    }
-}
-
 template <class _CharT, _Parse_replacement_field_callbacks<_CharT> _HandlerT>
 void _Parse_format_string(basic_string_view<_CharT> _Format_str, _HandlerT&& _Handler) {
-    auto _Begin         = _Format_str.data();
-    auto _End           = _Begin + _Format_str.size();
-    const _Cvtvec& _Cvt = _Getcvt();
+    auto _Begin = _Format_str.data();
+    auto _End   = _Begin + _Format_str.size();
+    const _Fmt_codec<_CharT> _Counter{};
 
     while (_Begin != _End) {
         const _CharT* _OpeningCurl = _Begin;
         if (*_Begin != '{') {
-            _OpeningCurl = _Find_encoded(_Begin, _End, _CharT{'{'}, _Cvt);
+            _OpeningCurl = _Counter._Find_encoded(_Begin, _End, _CharT{'{'});
 
             for (;;) {
-                const _CharT* _ClosingCurl = _Find_encoded(_Begin, _OpeningCurl, _CharT{'}'}, _Cvt);
+                const _CharT* _ClosingCurl = _Counter._Find_encoded(_Begin, _OpeningCurl, _CharT{'}'});
 
                 // In this case there are neither closing nor opening curls in [_Begin, _OpeningCurl)
                 // Write the whole thing out.
@@ -2214,95 +2329,15 @@ _NODISCARD _OutputIt _Fmt_write(
     return _Fmt_write(_STD move(_Out), basic_string_view<_CharT>{_Value}, _Specs, _Locale);
 }
 
-inline constexpr char16_t _Width_estimate_low_intervals[] = { // Per N4885 [format.string.std]/11
-    0x1100u, 0x1160u, 0x2329u, 0x232Bu, 0x2E80u, 0x303Fu, 0x3040u, 0xA4D0u, 0xAC00u, 0xD7A4u, 0xF900u, 0xFB00u, 0xFE10u,
-    0xFE1Au, 0xFE30u, 0xFE70u, 0xFF00u, 0xFF61u, 0xFFE0u, 0xFFE7u};
-
-inline constexpr char32_t _Width_estimate_high_intervals[] = { // Per N4885 [format.string.std]/11
-    0x1F300u, 0x1F650u, 0x1F900u, 0x1FA00u, 0x20000u, 0x2FFFEu, 0x30000u, 0x3FFFEu};
-
-template <auto& _Bounds>
-_NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
-    // Computes the width estimation for Unicode characters from N4885 [format.string.std]/11
-    int _Result = 1;
-    for (const auto& _Bound : _Bounds) {
-        if (_Ch < _Bound) {
-            return _Result;
-        }
-        _Result ^= 1;
-    }
-
-    return 1;
-}
-
-_NODISCARD inline int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
-    // Return an estimate for the width of the character composed of _Units code units,
-    // whose first code unit is denoted by _Ptr.
-    auto _Ch = static_cast<char32_t>(*_Ptr);
-    switch (_Units) {
-    default:
-    case 1:
-    case 2:
-        return 1;
-    case 3:
-        _Ch &= 0b1111u;
-        break;
-    case 4:
-        _Ch &= 0b111u;
-        break;
-    }
-
-    for (int _Idx = 1; _Idx < _Units; ++_Idx) {
-        _Ch = _Ch << 6 | (_Ptr[_Idx] & 0b11'1111u);
-    }
-
-    if (_Units == 3) {
-        return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
-    }
-
-    return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
-}
-
-_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) {
-    // Return an estimate for the width of the character composed of _Units code units,
-    // whose first code unit is denoted by _Ptr.
-    if constexpr (_Is_execution_charset_utf8_v) {
-        return _Estimate_utf8_character_width(_Ptr, _Units);
-    } else {
-        if (_Cvt._Mbcurmax != 4) {
-            // not a Unicode encoding; estimate width == number of code units
-            return _Units;
-        }
-
-        // assume UTF-8
-        return _Estimate_utf8_character_width(_Ptr, _Units);
-    }
-}
-
-_NODISCARD inline int _Estimate_character_width(const wchar_t* _Ptr, const int _Units, const _Cvtvec&) {
-    // Return an estimate for the width of the character composed of _Units code units,
-    // whose first code unit is denoted by _Ptr.
-    auto _Ch = static_cast<char32_t>(*_Ptr);
-    if (_Units == 1) {
-        return _Unicode_width_estimate<_Width_estimate_low_intervals>(_Ch);
-    }
-
-    // surrogate pair
-    _Ch = (_Ch - 0xD8000u) << 10;
-    _Ch += static_cast<char32_t>(_Ptr[1]) - 0xDC00u;
-    _Ch += 0x10000u;
-    return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
-}
-
 template <class _CharT>
 _NODISCARD const _CharT* _Measure_string_prefix(const basic_string_view<_CharT> _Value, int& _Width) {
     // Returns a pointer past-the-end of the largest prefix of _Value that fits in _Width, or all
     // of _Value if _Width is negative. Updates _Width to the estimated width of that prefix.
-    const int _Max_width    = _Width;
-    auto _Pos               = _Value.data();
-    const auto _Last        = _Pos + _Value.size();
-    int _Estimated_width    = 0; // the estimated width of [_Value.data(), _Pos)
-    const _Cvtvec& _Cvt     = _Getcvt();
+    const int _Max_width = _Width;
+    auto _Pos            = _Value.data();
+    const auto _Last     = _Pos + _Value.size();
+    int _Estimated_width = 0; // the estimated width of [_Value.data(), _Pos)
+    const _Fmt_codec<_CharT> _Codec;
     constexpr auto _Max_int = (numeric_limits<int>::max)();
 
     while (_Pos != _Last) {
@@ -2312,8 +2347,8 @@ _NODISCARD const _CharT* _Measure_string_prefix(const basic_string_view<_CharT>
         }
 
         // TRANSITION, extended grapheme clustering
-        const int _Units           = _Code_units_in_next_character(_Pos, _Last, _Cvt);
-        const int _Character_width = _Estimate_character_width(_Pos, _Units, _Cvt);
+        const int _Units           = _Codec._Units_in_next_character(_Pos, _Last);
+        const int _Character_width = _Codec._Estimate_width(_Pos, _Units);
 
         if (_Max_int - _Character_width < _Estimated_width) { // avoid overflow
             // Either _Max_width isn't set, or adding this character will exceed it.
diff --git a/stl/inc/xfilesystem_abi.h b/stl/inc/xfilesystem_abi.h
index 052af6cc93..f0b456bd69 100644
--- a/stl/inc/xfilesystem_abi.h
+++ b/stl/inc/xfilesystem_abi.h
@@ -205,7 +205,7 @@ _BITMASK_OPS(__std_fs_file_flags)
 
 enum class __std_fs_file_handle : intptr_t { _Invalid = -1 };
 
-enum class __std_code_page : unsigned int { _Utf8 = 65001 };
+enum class __std_code_page : unsigned int { _Acp = 0, _Utf8 = 65001 };
 
 struct __std_fs_convert_result {
     int _Len;
diff --git a/stl/src/format.cpp b/stl/src/format.cpp
new file mode 100644
index 0000000000..03bee36200
--- /dev/null
+++ b/stl/src/format.cpp
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <xfilesystem_abi.h>
+#include <xlocinfo.h>
+
+#include <Windows.h>
+
+static_assert(__std_code_page::_Acp == __std_code_page{CP_ACP});
+
+// CODEPAGE is conditionally defined so tests can override it.
+#ifndef CODEPAGE
+#define CODEPAGE(x) static_cast<UINT>(x)
+#endif
+
+extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
+    const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept {
+    // get conversion info for an arbitrary codepage
+    *_Pcvt = {};
+
+    CPINFOEXW _Info{};
+    const DWORD _Flags = 0; // reserved, must be zero
+    if (GetCPInfoExW(CODEPAGE(_Codepage), _Flags, &_Info) == 0) {
+        return __std_win_error{GetLastError()};
+    }
+
+    _Pcvt->_Page     = _Info.CodePage;
+    _Pcvt->_Mbcurmax = _Info.MaxCharSize;
+
+    for (int _Idx = 0; _Idx < MAX_LEADBYTES; _Idx += 2) {
+        if (_Info.LeadByte[_Idx] == 0 && _Info.LeadByte[_Idx + 1] == 0) {
+            break;
+        }
+
+        for (unsigned char _First = _Info.LeadByte[_Idx], _Last = _Info.LeadByte[_Idx + 1]; _First != _Last; ++_First) {
+            _Pcvt->_Isleadbyte[_First >> 3] |= 1u << (_First & 0b111u);
+        }
+    }
+
+    return __std_win_error::_Success;
+}
diff --git a/tests/std/include/test_format_support.hpp b/tests/std/include/test_format_support.hpp
index 89dd541e85..77cc36eb85 100644
--- a/tests/std/include/test_format_support.hpp
+++ b/tests/std/include/test_format_support.hpp
@@ -117,9 +117,10 @@ void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback
     callback_type&& callbacks                                               = {}) {
     try {
         auto end = func(view.data(), view.data() + view.size(), std::move(callbacks));
-        if (expected_end_position != std::basic_string_view<CharT>::npos) {
-            assert(end == view.data() + expected_end_position);
+        if (expected_end_position == std::basic_string_view<CharT>::npos) {
+            expected_end_position = view.size();
         }
+        assert(end == view.data() + expected_end_position);
         assert(!err_expected);
     } catch (const std::format_error&) {
         assert(err_expected);
diff --git a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
index ab37ef41f5..abee200168 100644
--- a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
@@ -975,31 +975,6 @@ void test_size() {
     test_size_helper<charT>(8, STR("{:8}"), STR("scully"));
 }
 
-void test_multibyte_format_strings() {
-#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
-    {
-        assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
-        // Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
-        assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");
-
-        assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
-        assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
-        assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
-    }
-
-    {
-        assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
-        try {
-            (void) format("{:\x9f\x8f\x88<10}"sv, 42); // Bad fill character encoding: missing lead byte before \x9f
-            assert(false);
-        } catch (const format_error&) {
-        }
-    }
-#endif // MSVC_INTERNAL_TESTING
-
-    assert(setlocale(LC_ALL, "C") != nullptr);
-}
-
 // The libfmt_ tests are derived from tests in
 // libfmt, Copyright (c) 2012 - present, Victor Zverovich
 // See NOTICE.txt for more information.
@@ -1318,8 +1293,6 @@ void test() {
     test_size<char>();
     test_size<wchar_t>();
 
-    test_multibyte_format_strings();
-
     libfmt_formatter_test_escape<char>();
     libfmt_formatter_test_escape<wchar_t>();
 
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
index 9aa5f2a5cd..8be668471a 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
@@ -1,7 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# This is `concepts_matrix.lst` with `/execution-charset:.932` added.
+# This is `concepts_matrix.lst` with `/execution-charset:.932` added, and with
+# `/Za` configurations disabled to support inclusion of `<Windows.h>`.
 # clang is excluded since it doesn't support non-UTF-8 execution charsets.
 
 RUNALL_INCLUDE ..\prefix.lst
@@ -21,7 +22,7 @@ PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-"
-PM_CL="/permissive- /Za /MD"
-PM_CL="/permissive- /Za /MDd"
+# PM_CL="/permissive- /Za /MD"
+# PM_CL="/permissive- /Za /MDd"
 # PM_CL="/permissive- /BE /c /MD"
 # PM_CL="/permissive- /BE /c /MTd"
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index beb4a499c6..4a2a2e8057 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -8,55 +8,48 @@
 
 #include "test_format_support.hpp"
 
+#pragma warning(disable : 5105) // macro expansion producing 'defined' has undefined behavior
+#define CODEPAGE(x) (static_cast<unsigned int>(x) ? static_cast<unsigned int>(x) : 932u)
+#include "../../../../stl/src/format.cpp"
+
 using namespace std;
 
 void test_multibyte_format_strings() {
-    {
-        assert(setlocale(LC_ALL, ".932") != nullptr);
-        const auto s =
-            "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
-        assert(format(s) == s);
-
-        assert(format("{:.2}", s) == "\x93\xfa"sv);
-        assert(format("{:4.2}", s) == "\x93\xfa  "sv);
+    const auto s = "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
+    assert(format(s) == s);
 
-        assert(format("{:<4.2}", s) == "\x93\xfa  "sv);
-        assert(format("{:^4.2}", s) == " \x93\xfa "sv);
-        assert(format("{:>4.2}", s) == "  \x93\xfa"sv);
+    assert(format("{:.2}", s) == "\x93\xfa"sv);
+    assert(format("{:4.2}", s) == "\x93\xfa  "sv);
 
-        assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
-        assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
-        assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
+    assert(format("{:<4.2}", s) == "\x93\xfa  "sv);
+    assert(format("{:^4.2}", s) == " \x93\xfa "sv);
+    assert(format("{:>4.2}", s) == "  \x93\xfa"sv);
 
-        assert(format("{:.3}", s) == "\x93\xfa"sv);
-        assert(format("{:4.3}", s) == "\x93\xfa  "sv);
+    assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
+    assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
+    assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
 
-        assert(format("{:<4.3}", s) == "\x93\xfa  "sv);
-        assert(format("{:^4.3}", s) == " \x93\xfa "sv);
-        assert(format("{:>4.3}", s) == "  \x93\xfa"sv);
+    assert(format("{:.3}", s) == "\x93\xfa"sv);
+    assert(format("{:4.3}", s) == "\x93\xfa  "sv);
 
-        assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
-        assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
-        assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
-    }
+    assert(format("{:<4.3}", s) == "\x93\xfa  "sv);
+    assert(format("{:^4.3}", s) == " \x93\xfa "sv);
+    assert(format("{:>4.3}", s) == "  \x93\xfa"sv);
 
-    assert(setlocale(LC_ALL, "C") != nullptr);
+    assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
+    assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
+    assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
 }
 
 void test_parse_align() {
-    auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
-
-    {
-        assert(setlocale(LC_ALL, ".932") != nullptr);
-        test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
-            {.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
-        test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
-            {.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
-        test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
-            {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
-    }
-
-    assert(setlocale(LC_ALL, "C") != nullptr);
+    const auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
+
+    test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3, //
+        {.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
+    test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
+        {.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
+    test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
+        {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
 }
 
 int main() {
diff --git a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
index ddf204c0db..39fe7bd217 100644
--- a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
@@ -36,7 +36,7 @@ bool test_parse_align() {
         // \x343E (which is from CJK unified ideographs extension A) and similar characters to parse as
         // an alignment specifier.
         auto s4 = L"*\x343E"sv;
-        test_parse_helper(parse_align_fn, s4, false, view_typ::npos, {.expected_fill = L"*"sv});
+        test_parse_helper(parse_align_fn, s4, false, 0, {.expected_fill = L"*"sv});
 
         // test multi-code-unit fill characters
         {
@@ -47,22 +47,6 @@ bool test_parse_align() {
             test_parse_helper(parse_align_fn, L"\U0001F3C8^X"sv, false, 3,
                 {.expected_alignment = _Fmt_align::_Center, .expected_fill = L"\U0001F3C8"sv});
         }
-    } else {
-        // test multibyte fill characters
-#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
-        {
-            assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
-            // "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
-            test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
-                {.expected_alignment = _Fmt_align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
-            test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
-                {.expected_alignment = _Fmt_align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
-            test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
-                {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
-        }
-#endif // MSVC_INTERNAL_TESTING
-
-        assert(setlocale(LC_ALL, "C") != nullptr);
     }
 
     return true;
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
index 42da0946d2..d5bd9f429e 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
@@ -1,6 +1,30 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-RUNALL_INCLUDE ..\concepts_matrix.lst
+# This is `concepts_matrix.lst` with `/utf-8` added, and with
+# `/Za` configurations disabled to support inclusion of `<Windows.h>`.
+# clang is excluded since it doesn't support non-UTF-8 execution charsets.
+
+RUNALL_INCLUDE ..\prefix.lst
+RUNALL_CROSSLIST
+PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
+RUNALL_CROSSLIST
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:noexceptTypes-"
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:char8_t- /Zc:preprocessor"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:wchar_t-"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /fp:except /Zc:preprocessor"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /analyze:only /analyze:autolog-"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-"
+# PM_CL="/permissive- /Za /MD"
+# PM_CL="/permissive- /Za /MDd"
+# PM_CL="/permissive- /BE /c /MD"
+# PM_CL="/permissive- /BE /c /MTd"
 RUNALL_CROSSLIST
 PM_CL="/utf-8"
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index 5b98879423..f071e56642 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -8,6 +8,10 @@
 
 #include "test_format_support.hpp"
 
+#pragma warning(disable : 5105) // macro expansion producing 'defined' has undefined behavior
+#define CODEPAGE(x) (static_cast<unsigned int>(x) ? static_cast<unsigned int>(x) : 65001u)
+#include "../../../../stl/src/format.cpp"
+
 using namespace std;
 
 void test_multibyte_format_strings() {

From eaa454f39e7300d90bf1fde516bec954f65f1bc5 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Wed, 14 Apr 2021 08:39:44 -0700
Subject: [PATCH 2/7] More hygienic testing method

---
 stl/inc/format                                |  8 ++++--
 stl/src/format.cpp                            |  2 ++
 .../env.lst                                   |  7 +++--
 .../test.cpp                                  |  6 ++---
 .../P0645R10_text_formatting_utf8/env.lst     | 26 +------------------
 .../P0645R10_text_formatting_utf8/test.cpp    |  6 ++---
 6 files changed, 16 insertions(+), 39 deletions(-)

diff --git a/stl/inc/format b/stl/inc/format
index dea6e05e8c..b8e193a55a 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -498,9 +498,13 @@ protected:
     }
 
     _Fmt_codec_base() {
-        if (__std_get_cvt(__std_code_page::_Acp, &_Cvt) != __std_win_error::_Success) {
-            _THROW(format_error("FIXME"));
+#ifndef _FORMAT_CODEPAGE
+#define _FORMAT_CODEPAGE __std_code_page::_Acp
+#endif // _FORMAT_CODEPAGE
+        if (__std_get_cvt(_FORMAT_CODEPAGE, &_Cvt) != __std_win_error::_Success) {
+            _THROW(format_error("failed to retrieve codepage info"));
         }
+#undef _FORMAT_CODEPAGE
     }
 };
 
diff --git a/stl/src/format.cpp b/stl/src/format.cpp
index 03bee36200..98966a180e 100644
--- a/stl/src/format.cpp
+++ b/stl/src/format.cpp
@@ -21,6 +21,8 @@ extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
     CPINFOEXW _Info{};
     const DWORD _Flags = 0; // reserved, must be zero
     if (GetCPInfoExW(CODEPAGE(_Codepage), _Flags, &_Info) == 0) {
+        // NB: the only documented failure mode for GetCPInfoExW is ERROR_INVALID_PARAMETER,
+        // so in practice it should never fail for CP_ACP.
         return __std_win_error{GetLastError()};
     }
 
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
index 8be668471a..9aa5f2a5cd 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
@@ -1,8 +1,7 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# This is `concepts_matrix.lst` with `/execution-charset:.932` added, and with
-# `/Za` configurations disabled to support inclusion of `<Windows.h>`.
+# This is `concepts_matrix.lst` with `/execution-charset:.932` added.
 # clang is excluded since it doesn't support non-UTF-8 execution charsets.
 
 RUNALL_INCLUDE ..\prefix.lst
@@ -22,7 +21,7 @@ PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive"
 PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-"
-# PM_CL="/permissive- /Za /MD"
-# PM_CL="/permissive- /Za /MDd"
+PM_CL="/permissive- /Za /MD"
+PM_CL="/permissive- /Za /MDd"
 # PM_CL="/permissive- /BE /c /MD"
 # PM_CL="/permissive- /BE /c /MTd"
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index 4a2a2e8057..9ed6a591d8 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#define _FORMAT_CODEPAGE (__std_code_page{932})
+
 #include <cassert>
 #include <clocale>
 #include <format>
@@ -8,10 +10,6 @@
 
 #include "test_format_support.hpp"
 
-#pragma warning(disable : 5105) // macro expansion producing 'defined' has undefined behavior
-#define CODEPAGE(x) (static_cast<unsigned int>(x) ? static_cast<unsigned int>(x) : 932u)
-#include "../../../../stl/src/format.cpp"
-
 using namespace std;
 
 void test_multibyte_format_strings() {
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
index d5bd9f429e..42da0946d2 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
@@ -1,30 +1,6 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-# This is `concepts_matrix.lst` with `/utf-8` added, and with
-# `/Za` configurations disabled to support inclusion of `<Windows.h>`.
-# clang is excluded since it doesn't support non-UTF-8 execution charsets.
-
-RUNALL_INCLUDE ..\prefix.lst
-RUNALL_CROSSLIST
-PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"
-RUNALL_CROSSLIST
-PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:noexceptTypes-"
-PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
-PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:char8_t- /Zc:preprocessor"
-PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:wchar_t-"
-PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
-PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /fp:except /Zc:preprocessor"
-PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
-PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /analyze:only /analyze:autolog-"
-PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
-PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict"
-PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
-PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive"
-PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-"
-# PM_CL="/permissive- /Za /MD"
-# PM_CL="/permissive- /Za /MDd"
-# PM_CL="/permissive- /BE /c /MD"
-# PM_CL="/permissive- /BE /c /MTd"
+RUNALL_INCLUDE ..\concepts_matrix.lst
 RUNALL_CROSSLIST
 PM_CL="/utf-8"
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index f071e56642..2fa79b83e0 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#define _FORMAT_CODEPAGE __std_code_page::_Utf8
+
 #include <cassert>
 #include <clocale>
 #include <format>
@@ -8,10 +10,6 @@
 
 #include "test_format_support.hpp"
 
-#pragma warning(disable : 5105) // macro expansion producing 'defined' has undefined behavior
-#define CODEPAGE(x) (static_cast<unsigned int>(x) ? static_cast<unsigned int>(x) : 65001u)
-#include "../../../../stl/src/format.cpp"
-
 using namespace std;
 
 void test_multibyte_format_strings() {

From 9494a9cc80121e766cacb0d07652128d14bfb9a7 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Wed, 14 Apr 2021 11:35:38 -0700
Subject: [PATCH 3/7] Review comments

---
 stl/inc/format                                  | 17 +++++++----------
 stl/msbuild/stl_base/stl.files.settings.targets |  1 +
 stl/src/format.cpp                              | 14 ++++++++------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/stl/inc/format b/stl/inc/format
index b8e193a55a..fb942333a3 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -63,8 +63,7 @@ _STL_DISABLE_CLANG_WARNINGS
 #pragma push_macro("new")
 #undef new
 
-extern "C" _NODISCARD __std_win_error __stdcall __std_get_cvt(
-    const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept;
+extern "C" _NODISCARD __std_win_error __stdcall __std_get_cvt(__std_code_page _Codepage, _Cvtvec* _Pcvt) noexcept;
 
 _STD_BEGIN
 
@@ -501,9 +500,8 @@ protected:
 #ifndef _FORMAT_CODEPAGE
 #define _FORMAT_CODEPAGE __std_code_page::_Acp
 #endif // _FORMAT_CODEPAGE
-        if (__std_get_cvt(_FORMAT_CODEPAGE, &_Cvt) != __std_win_error::_Success) {
-            _THROW(format_error("failed to retrieve codepage info"));
-        }
+        [[maybe_unused]] const __std_win_error _Result = __std_get_cvt(_FORMAT_CODEPAGE, &_Cvt);
+        _STL_INTERNAL_CHECK(_Result == __std_win_error::_Success);
 #undef _FORMAT_CODEPAGE
     }
 };
@@ -537,7 +535,7 @@ private:
         return (_Len >= 4) ? 4 : -1;
     }
 
-    _NODISCARD static constexpr int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
+    _NODISCARD static int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
         // Return an estimate for the width of the character composed of _Units code units,
         // whose first code unit is denoted by _Ptr.
         auto _Ch = static_cast<char32_t>(*_Ptr);
@@ -633,7 +631,6 @@ public:
 
 template <bool _Statically_Utf8>
 class _Fmt_codec<wchar_t, _Statically_Utf8> {
-private:
 public:
     _NODISCARD int _Units_in_next_character(const wchar_t* _First, const wchar_t* _Last) const noexcept {
         // Returns a count of the number of code units that compose the first encoded character in
@@ -948,15 +945,15 @@ template <class _CharT, _Parse_replacement_field_callbacks<_CharT> _HandlerT>
 void _Parse_format_string(basic_string_view<_CharT> _Format_str, _HandlerT&& _Handler) {
     auto _Begin = _Format_str.data();
     auto _End   = _Begin + _Format_str.size();
-    const _Fmt_codec<_CharT> _Counter{};
+    const _Fmt_codec<_CharT> _Codec;
 
     while (_Begin != _End) {
         const _CharT* _OpeningCurl = _Begin;
         if (*_Begin != '{') {
-            _OpeningCurl = _Counter._Find_encoded(_Begin, _End, _CharT{'{'});
+            _OpeningCurl = _Codec._Find_encoded(_Begin, _End, _CharT{'{'});
 
             for (;;) {
-                const _CharT* _ClosingCurl = _Counter._Find_encoded(_Begin, _OpeningCurl, _CharT{'}'});
+                const _CharT* _ClosingCurl = _Codec._Find_encoded(_Begin, _OpeningCurl, _CharT{'}'});
 
                 // In this case there are neither closing nor opening curls in [_Begin, _OpeningCurl)
                 // Write the whole thing out.
diff --git a/stl/msbuild/stl_base/stl.files.settings.targets b/stl/msbuild/stl_base/stl.files.settings.targets
index 05ff1f5ddb..15aa0fbc46 100644
--- a/stl/msbuild/stl_base/stl.files.settings.targets
+++ b/stl/msbuild/stl_base/stl.files.settings.targets
@@ -161,6 +161,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
              (controlled by IncludeInLink and IncludeInImportLib). -->
         <BuildFiles Include="
             $(CrtRoot)\github\stl\src\filesystem.cpp;
+            $(CrtRoot)\github\stl\src\format.cpp;
             $(CrtRoot)\github\stl\src\locale0_implib.cpp;
             $(CrtRoot)\github\stl\src\nothrow.cpp;
             $(CrtRoot)\github\stl\src\sharedmutex.cpp;
diff --git a/stl/src/format.cpp b/stl/src/format.cpp
index 98966a180e..2d8d5d9fec 100644
--- a/stl/src/format.cpp
+++ b/stl/src/format.cpp
@@ -1,6 +1,13 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+// Implements a win32 API wrapper for <format>
+
+// This must be as small as possible, because its contents are
+// injected into the msvcprt.lib and msvcprtd.lib import libraries.
+// Do not include or define anything else here.
+// In particular, basic_string must not be included here.
+
 #include <xfilesystem_abi.h>
 #include <xlocinfo.h>
 
@@ -8,11 +15,6 @@
 
 static_assert(__std_code_page::_Acp == __std_code_page{CP_ACP});
 
-// CODEPAGE is conditionally defined so tests can override it.
-#ifndef CODEPAGE
-#define CODEPAGE(x) static_cast<UINT>(x)
-#endif
-
 extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
     const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept {
     // get conversion info for an arbitrary codepage
@@ -20,7 +22,7 @@ extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
 
     CPINFOEXW _Info{};
     const DWORD _Flags = 0; // reserved, must be zero
-    if (GetCPInfoExW(CODEPAGE(_Codepage), _Flags, &_Info) == 0) {
+    if (GetCPInfoExW(static_cast<UINT>(_Codepage), _Flags, &_Info) == 0) {
         // NB: the only documented failure mode for GetCPInfoExW is ERROR_INVALID_PARAMETER,
         // so in practice it should never fail for CP_ACP.
         return __std_win_error{GetLastError()};

From 799a55a525fd9c5e083f1b6c2b9dbc55561956a7 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Thu, 15 Apr 2021 14:20:43 -0700
Subject: [PATCH 4/7] Remove confusing comment

---
 stl/inc/format | 2 --
 1 file changed, 2 deletions(-)

diff --git a/stl/inc/format b/stl/inc/format
index fb942333a3..ed5c45dae7 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -654,8 +654,6 @@ public:
     }
 
     _NODISCARD const wchar_t* _Find_encoded(const wchar_t* _First, const wchar_t* _Last, const wchar_t _Val) const {
-        // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
-        // continuation byte) in [_First, _Last).
         return _Find_unchecked(_First, _Last, _Val);
     }
 

From c6efbdba4d3b69a1fc06af86a03f63cb9536cb96 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Fri, 16 Apr 2021 11:07:10 -0700
Subject: [PATCH 5/7] Add test coverage for width estimation

... fix 2 bugs exposed by that coverage, and address review comments. Drive-by: Remove the `_FORMAT_CODEPAGE` override from the `formatting_utf8` test which is unnecessary since we compile it with `/utf-8`.
---
 stl/inc/format                                |  11 +-
 .../test.cpp                                  |  29 +++++
 .../P0645R10_text_formatting_utf8/test.cpp    | 119 +++++++++++++++---
 3 files changed, 140 insertions(+), 19 deletions(-)

diff --git a/stl/inc/format b/stl/inc/format
index ed5c45dae7..4275f77a71 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -462,7 +462,7 @@ _NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
         if (_Ch < _Bound) {
             return _Result;
         }
-        _Result ^= 1;
+        _Result ^= 0b11u; // Flip between 1 and 2 on each iteration
     }
 
     return 1;
@@ -564,7 +564,7 @@ private:
     }
 
 public:
-    _NODISCARD int _Units_in_next_character(const char* _First, const char* _Last) const noexcept {
+    _NODISCARD int _Units_in_next_character(const char* const _First, const char* const _Last) const noexcept {
         // Returns a count of the number of code units that compose the first encoded character in
         // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
         // *_First is not a valid lead byte.
@@ -589,7 +589,7 @@ public:
         }
     }
 
-    _NODISCARD const char* _Find_encoded(const char* _First, const char* _Last, const char _Val) const {
+    _NODISCARD const char* _Find_encoded(const char* _First, const char* const _Last, const char _Val) const {
         // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
         // continuation byte) in [_First, _Last).
         if constexpr (_Statically_Utf8) {
@@ -632,7 +632,7 @@ public:
 template <bool _Statically_Utf8>
 class _Fmt_codec<wchar_t, _Statically_Utf8> {
 public:
-    _NODISCARD int _Units_in_next_character(const wchar_t* _First, const wchar_t* _Last) const noexcept {
+    _NODISCARD int _Units_in_next_character(const wchar_t* _First, const wchar_t* const _Last) const noexcept {
         // Returns a count of the number of code units that compose the first encoded character in
         // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
         // *_First is an unpaired surrogate.
@@ -653,7 +653,8 @@ public:
         return 2; // surrogate pair
     }
 
-    _NODISCARD const wchar_t* _Find_encoded(const wchar_t* _First, const wchar_t* _Last, const wchar_t _Val) const {
+    _NODISCARD const wchar_t* _Find_encoded(
+        const wchar_t* const _First, const wchar_t* const _Last, const wchar_t _Val) const {
         return _Find_unchecked(_First, _Last, _Val);
     }
 
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index 9ed6a591d8..bd81958f9c 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -50,7 +50,36 @@ void test_parse_align() {
         {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
 }
 
+void test_width_estimation() {
+    // Format strings of known width with a trailing delimiter using a precision large enough to
+    // include all but the delimiter to validate the width estimation code.
+    struct test_case {
+        const char* str;
+        int width;
+    };
+    constexpr test_case test_cases[] = {
+        // Pick a "short" and "long" codepoints (\x20 and \x96\x7b), then form
+        // all permutations of 3-codepoint prefixes. This gives us coverage of
+        // all transitions (e.g. short-to-long, long-to-long).
+        {"\x20\x20\x20\x58", 4},
+        {"\x20\x20\x96\x7b\x58", 5},
+        {"\x20\x96\x7b\x20\x58", 5},
+        {"\x96\x7b\x20\x20\x58", 5},
+        {"\x20\x96\x7b\x96\x7b\x58", 6},
+        {"\x96\x7b\x20\x96\x7b\x58", 6},
+        {"\x96\x7b\x96\x7b\x20\x58", 6},
+        {"\x96\x7b\x96\x7b\x96\x7b\x58", 7},
+    };
+
+    for (const auto& test : test_cases) {
+        basic_string_view sv{test.str};
+        sv = sv.substr(0, sv.size() - 1);
+        assert(format("{:.{}}", test.str, test.width - 1) == sv);
+    }
+}
+
 int main() {
     test_multibyte_format_strings();
     test_parse_align();
+    test_width_estimation();
 }
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index 2fa79b83e0..9f33b368de 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -1,8 +1,6 @@
 // Copyright (c) Microsoft Corporation.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-#define _FORMAT_CODEPAGE __std_code_page::_Utf8
-
 #include <cassert>
 #include <clocale>
 #include <format>
@@ -14,12 +12,12 @@ using namespace std;
 
 void test_multibyte_format_strings() {
     {
-        // Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
-        assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");
+        // Filling with footballs ("\U0001f3c8" is U+1F3C8 AMERICAN FOOTBALL)
+        assert(format("{:\U0001f3c8>4}"sv, 42) == "\U0001f3c8\U0001f3c8\x34\x32");
 
-        assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
-        assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
-        assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
+        assert(format("{:\U0001f3c8<4.2}", "1") == "\x31\U0001f3c8\U0001f3c8\U0001f3c8"sv);
+        assert(format("{:\U0001f3c8^4.2}", "1") == "\U0001f3c8\x31\U0001f3c8\U0001f3c8"sv);
+        assert(format("{:\U0001f3c8>4.2}", "1") == "\U0001f3c8\U0001f3c8\U0001f3c8\x31"sv);
     }
 
     {
@@ -35,19 +33,112 @@ void test_parse_align() {
     auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
 
     {
-        // "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
-        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
-            {.expected_alignment = _Fmt_align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
-        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
-            {.expected_alignment = _Fmt_align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
-        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
-            {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
+        test_parse_helper(parse_align_fn, "\U0001f3c8<X"sv, false, 5,
+            {.expected_alignment = _Fmt_align::_Left, .expected_fill = "\U0001f3c8"sv});
+        test_parse_helper(parse_align_fn, "\U0001f3c8>X"sv, false, 5,
+            {.expected_alignment = _Fmt_align::_Right, .expected_fill = "\U0001f3c8"sv});
+        test_parse_helper(parse_align_fn, "\U0001f3c8^X"sv, false, 5,
+            {.expected_alignment = _Fmt_align::_Center, .expected_fill = "\U0001f3c8"sv});
+    }
+}
+
+template <class CharT>
+void test_width_estimation() {
+    // Format strings of known width with a trailing delimiter using a precision large enough to
+    // include all but the delimiter to validate the width estimation code.
+    struct test_case {
+        const CharT* str;
+        int width;
+    };
+    constexpr test_case test_cases[] = {
+        {TYPED_LITERAL(CharT, "\x58"), 1},
+        {TYPED_LITERAL(CharT, "x\x58"), 2},
+        // test the boundaries of the intervals defined in n4885 [format.str.std]/11
+        {TYPED_LITERAL(CharT, "\u10ff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u1100\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u115f\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u1160\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u2328\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u2329\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u232a\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u232b\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u2e7f\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u2e80\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u303e\x58"), 3},
+        {TYPED_LITERAL(CharT, "\u303f\x58"), 2},
+        {TYPED_LITERAL(CharT, "\u3040\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ua4cf\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ua4d0\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uabff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uac00\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ud7a3\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ud7a4\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ud7ff\x58"), 2},
+        // skip over the surrogate pair range (\ud800-\udfff)
+        {TYPED_LITERAL(CharT, "\ue000\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uf8ff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uf900\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufaff\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufb00\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ufe0f\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ufe10\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufe19\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufe1a\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ufe2f\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ufe30\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufe6f\x58"), 3},
+        {TYPED_LITERAL(CharT, "\ufe70\x58"), 2},
+        {TYPED_LITERAL(CharT, "\ufeff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uff00\x58"), 3},
+        {TYPED_LITERAL(CharT, "\uff60\x58"), 3},
+        {TYPED_LITERAL(CharT, "\uff61\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uffdf\x58"), 2},
+        {TYPED_LITERAL(CharT, "\uffe0\x58"), 3},
+        {TYPED_LITERAL(CharT, "\uffe6\x58"), 3},
+        {TYPED_LITERAL(CharT, "\uffe7\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0001f2ff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0001f300\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0001f64f\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0001f650\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0001f8ff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0001f900\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0001f9ff\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0001fa00\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0001ffff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U00020000\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0002fffd\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0002fffe\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0002ffff\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U00030000\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0003fffd\x58"), 3},
+        {TYPED_LITERAL(CharT, "\U0003fffe\x58"), 2},
+        {TYPED_LITERAL(CharT, "\U0010ffff\x58"), 2},
+
+        // Pick a "short" and "long" codepoints (\u2000 and \ufe40), then form
+        // all permutations of 3-codepoint prefixes. This gives us coverage of
+        // all transitions (e.g. short-to-long, long-to-long).
+        {TYPED_LITERAL(CharT, "\u2000\u2000\u2000\x58"), 4},
+        {TYPED_LITERAL(CharT, "\u2000\u2000\ufe40\x58"), 5},
+        {TYPED_LITERAL(CharT, "\u2000\ufe40\u2000\x58"), 5},
+        {TYPED_LITERAL(CharT, "\ufe40\u2000\u2000\x58"), 5},
+        {TYPED_LITERAL(CharT, "\u2000\ufe40\ufe40\x58"), 6},
+        {TYPED_LITERAL(CharT, "\ufe40\u2000\ufe40\x58"), 6},
+        {TYPED_LITERAL(CharT, "\ufe40\ufe40\u2000\x58"), 6},
+        {TYPED_LITERAL(CharT, "\ufe40\ufe40\ufe40\x58"), 7},
+    };
+
+    for (const auto& test : test_cases) {
+        basic_string_view sv{test.str};
+        sv = sv.substr(0, sv.size() - 1);
+        assert(format(TYPED_LITERAL(CharT, "{:.{}}"), test.str, test.width - 1) == sv);
     }
 }
 
 void run_tests() {
     test_multibyte_format_strings();
     test_parse_align();
+    test_width_estimation<char>();
+    test_width_estimation<wchar_t>();
 }
 
 int main() {

From 16c599c712bb99400ade37f705a84e20965ad64f Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Fri, 16 Apr 2021 13:14:38 -0700
Subject: [PATCH 6/7] *sigh* commit the fix for the second bug

---
 stl/inc/format | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/inc/format b/stl/inc/format
index 4275f77a71..d09850c03f 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -667,7 +667,7 @@ public:
         }
 
         // surrogate pair
-        _Ch = (_Ch - 0xD8000u) << 10;
+        _Ch = (_Ch - 0xD800u) << 10;
         _Ch += static_cast<char32_t>(_Ptr[1]) - 0xDC00u;
         _Ch += 0x10000u;
         return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);

From 3f2944f6860bb8ce9ea19c1db899a358a0bc8825 Mon Sep 17 00:00:00 2001
From: Casey Carter <cacarter@microsoft.com>
Date: Fri, 16 Apr 2021 15:15:50 -0700
Subject: [PATCH 7/7] More review comments

---
 .../test.cpp                                          | 11 +++++++----
 .../std/tests/P0645R10_text_formatting_utf8/test.cpp  | 11 +++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index bd81958f9c..081eecd7ce 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -58,9 +58,12 @@ void test_width_estimation() {
         int width;
     };
     constexpr test_case test_cases[] = {
-        // Pick a "short" and "long" codepoints (\x20 and \x96\x7b), then form
-        // all permutations of 3-codepoint prefixes. This gives us coverage of
-        // all transitions (e.g. short-to-long, long-to-long).
+        {"\x58", 1},
+        {"x\x58", 2},
+
+        // Pick "short" and "long" codepoints (\x20 and \x96\x7b), then form all permutations of
+        // 3-codepoint prefixes with the same fixed delimiter as above. This gives us coverage of
+        // all adjacent pairings (short/short, short/long, long/short, long/long).
         {"\x20\x20\x20\x58", 4},
         {"\x20\x20\x96\x7b\x58", 5},
         {"\x20\x96\x7b\x20\x58", 5},
@@ -72,7 +75,7 @@ void test_width_estimation() {
     };
 
     for (const auto& test : test_cases) {
-        basic_string_view sv{test.str};
+        string_view sv{test.str};
         sv = sv.substr(0, sv.size() - 1);
         assert(format("{:.{}}", test.str, test.width - 1) == sv);
     }
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index 9f33b368de..08a70afaac 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -53,7 +53,8 @@ void test_width_estimation() {
     constexpr test_case test_cases[] = {
         {TYPED_LITERAL(CharT, "\x58"), 1},
         {TYPED_LITERAL(CharT, "x\x58"), 2},
-        // test the boundaries of the intervals defined in n4885 [format.str.std]/11
+
+        // test the boundaries of the intervals defined in n4885 [format.string.std]/11
         {TYPED_LITERAL(CharT, "\u10ff\x58"), 2},
         {TYPED_LITERAL(CharT, "\u1100\x58"), 3},
         {TYPED_LITERAL(CharT, "\u115f\x58"), 3},
@@ -74,7 +75,9 @@ void test_width_estimation() {
         {TYPED_LITERAL(CharT, "\ud7a3\x58"), 3},
         {TYPED_LITERAL(CharT, "\ud7a4\x58"), 2},
         {TYPED_LITERAL(CharT, "\ud7ff\x58"), 2},
+
         // skip over the surrogate pair range (\ud800-\udfff)
+
         {TYPED_LITERAL(CharT, "\ue000\x58"), 2},
         {TYPED_LITERAL(CharT, "\uf8ff\x58"), 2},
         {TYPED_LITERAL(CharT, "\uf900\x58"), 3},
@@ -114,9 +117,9 @@ void test_width_estimation() {
         {TYPED_LITERAL(CharT, "\U0003fffe\x58"), 2},
         {TYPED_LITERAL(CharT, "\U0010ffff\x58"), 2},
 
-        // Pick a "short" and "long" codepoints (\u2000 and \ufe40), then form
-        // all permutations of 3-codepoint prefixes. This gives us coverage of
-        // all transitions (e.g. short-to-long, long-to-long).
+        // Pick "short" and "long" codepoints (\u2000 and \ufe40), then form all permutations of
+        // 3-codepoint prefixes with the same fixed delimiter as above. This gives us coverage of
+        // all adjacent pairings (short/short, short/long, long/short, long/long).
         {TYPED_LITERAL(CharT, "\u2000\u2000\u2000\x58"), 4},
         {TYPED_LITERAL(CharT, "\u2000\u2000\ufe40\x58"), 5},
         {TYPED_LITERAL(CharT, "\u2000\ufe40\u2000\x58"), 5},