From 6c46855590efbb062fc9a87e176d765326597ab1 Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Mon, 19 Nov 2018 14:10:08 -0800 Subject: [PATCH 1/6] Added perofrmance improvements to utf-16 conversions --- source/shared/core_stmt.cpp | 21 +- source/shared/core_util.cpp | 15 +- source/shared/globalization.h | 2 + source/shared/localization.hpp | 6 +- source/shared/localizationimpl.cpp | 309 +++++++++++++++++++++++++++++ 5 files changed, 338 insertions(+), 15 deletions(-) diff --git a/source/shared/core_stmt.cpp b/source/shared/core_stmt.cpp index ddb1b981a..2fc5b96e8 100644 --- a/source/shared/core_stmt.cpp +++ b/source/shared/core_stmt.cpp @@ -1814,22 +1814,22 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve std::size_t buffer_len = Z_STRLEN_P( input_param_z ); int wchar_size; - if (buffer_len > INT_MAX) - { - LOG(SEV_ERROR, "Convert input parameter to utf16: buffer length exceeded."); - throw core::CoreException(); - } + if (buffer_len > INT_MAX) + { + LOG(SEV_ERROR, "Convert input parameter to utf16: buffer length exceeded."); + throw core::CoreException(); + } // if the string is empty, then just return that the conversion succeeded as // MultiByteToWideChar will "fail" on an empty string. if( buffer_len == 0 ) { - core::sqlsrv_zval_stringl( converted_param_z, "", 0 ); + core::sqlsrv_zval_stringl( converted_param_z, "", 0 ); return true; } // if the parameter is an input parameter, calc the size of the necessary buffer from the length of the string #ifndef _WIN32 - wchar_size = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); + wchar_size = buffer_len;//SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); #else wchar_size = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); #endif // !_WIN32 @@ -1842,7 +1842,7 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve wbuffer = reinterpret_cast( sqlsrv_malloc( (wchar_size + 1) * sizeof( SQLWCHAR ) )); // convert the utf-8 string to a wchar string in the new buffer #ifndef _WIN32 - int r = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); + int r = SystemLocale::ToUtf16( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #else int r = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #endif // !_WIN32 @@ -1850,9 +1850,10 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve if( r == 0 ) { return false; } + wchar_size = r; // null terminate the string, set the size within the zval, and return success - wbuffer[wchar_size] = L'\0'; + wbuffer[ wchar_size ] = L'\0'; core::sqlsrv_zval_stringl( converted_param_z, reinterpret_cast( wbuffer.get() ), wchar_size * sizeof( SQLWCHAR ) ); sqlsrv_free(wbuffer); wbuffer.transferred(); @@ -2214,7 +2215,7 @@ void get_field_as_string( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_ind { SQLRETURN r; SQLSMALLINT c_type; - SQLLEN sql_field_type = 0; + SQLSMALLINT sql_field_type = 0; SQLSMALLINT extra = 0; SQLLEN field_len_temp = 0; SQLLEN sql_display_size = 0; diff --git a/source/shared/core_util.cpp b/source/shared/core_util.cpp index ca097a24b..33089e871 100644 --- a/source/shared/core_util.cpp +++ b/source/shared/core_util.cpp @@ -148,7 +148,8 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( // calculate the number of characters needed #ifndef _WIN32 - cchOutLen = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, NULL, 0 ); + //~ cchOutLen = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, NULL, 0 ); + cchOutLen = 4*cchInLen; #else cchOutLen = WideCharToMultiByte( encoding, flags, inString, cchInLen, @@ -161,6 +162,7 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( // Create a buffer to fit the encoded string char* newString = reinterpret_cast( sqlsrv_malloc( cchOutLen + 1 /* NULL char*/ )); + memset(newString, '\0', cchOutLen+1); #ifndef _WIN32 int rc = SystemLocale::FromUtf16( encoding, inString, cchInLen, newString, static_cast(cchOutLen)); @@ -172,9 +174,14 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( sqlsrv_free( newString ); return false; } - - *outString = newString; - newString[cchOutLen] = '\0'; // null terminate the encoded string + char* newString2 = reinterpret_cast( sqlsrv_malloc( rc + 1 /* NULL char*/ )); + memset(newString2, '\0', rc+1); + memcpy(newString2, newString, rc+1); + sqlsrv_free( newString ); + + *outString = newString2; + newString2[rc] = '\0'; // null terminate the encoded string + cchOutLen = rc; return true; } diff --git a/source/shared/globalization.h b/source/shared/globalization.h index 98619d61a..fb9eb7ab2 100644 --- a/source/shared/globalization.h +++ b/source/shared/globalization.h @@ -261,6 +261,8 @@ class EncodingConverter return 0; } } + //if a shift sequence is encountered, we need to advance output buffer + iconv_ret = iconv( m_pCvtCache->GetIConv(), NULL, NULL, &dest.m_pBytes, &dest.m_nBytesLeft ); } return cchDest - (dest.m_nBytesLeft / sizeof(DestType)); diff --git a/source/shared/localization.hpp b/source/shared/localization.hpp index 79bd860e2..3a49d4602 100644 --- a/source/shared/localization.hpp +++ b/source/shared/localization.hpp @@ -169,8 +169,12 @@ class SystemLocale static size_t FromUtf16Strict(UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, __out_ecount_opt(cchDest) char * dest, size_t cchDest, bool * pHasDataLoss = NULL, DWORD * pErrorCode = NULL); + // CP1252 to UTF16 conversion which does not involve iconv + static size_t CP1252ToUtf16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ); - + // UTF8/16 conversion which does not involve iconv + static size_t Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ); + static size_t Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode ); // ----------------------------------------------------------------------- // Public Member Functions diff --git a/source/shared/localizationimpl.cpp b/source/shared/localizationimpl.cpp index 669462abc..afc6c2e0b 100644 --- a/source/shared/localizationimpl.cpp +++ b/source/shared/localizationimpl.cpp @@ -336,9 +336,174 @@ const SystemLocale & SystemLocale::Singleton() return s_Default; } + +// Convert CP1252 to UTF-16 without requiring iconv or taking a lock. +// This is trivial because, except for the 80-9F range, CP1252 bytes +// directly map to the corresponding UTF-16 codepoint. +size_t SystemLocale::CP1252ToUtf16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ) +{ + const static WCHAR s_1252Map[] = + { + 0x20AC, 0x003F, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003F, 0x017D, 0x003F, + 0x003F, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178 + }; + const unsigned char *usrc = reinterpret_cast(src); + const unsigned char *srcEnd = usrc + cchSrc; + const WCHAR *destEnd = dest + cchDest; + + while(usrc < srcEnd && dest < destEnd) + { + DWORD ucode = *usrc++; + *dest++ = (ucode <= 127 || ucode >= 160) ? ucode : s_1252Map[ucode - 128]; + } + pErrorCode && (*pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS); + return cchDest - (destEnd - dest); +} + +// Convert UTF-8 to UTF-16 without requiring iconv or taking a lock. +// 0abcdefg -> 0abcdefg 00000000 +// 110abcde 10fghijk -> defghijk 00000abc +// 1110abcd 10efghij 10klmnop -> ijklmnop abcdefgh +// 11110abc 10defghi 10jklmno 10pqrstu -> cdfghijk 110110ab nopqrstu 11011lm +size_t SystemLocale::Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ) +{ + const unsigned char *usrc = reinterpret_cast(src); + const unsigned char *srcEnd = usrc + cchSrc; + const WCHAR *destEnd = dest + cchDest; + DWORD dummyError; + if (!pErrorCode) + { + pErrorCode = &dummyError; + } + *pErrorCode = 0; + + while(usrc < srcEnd && dest < destEnd) + { + DWORD ucode = *usrc++; + if(ucode <= 127) // Most common case for ASCII + { + *dest++ = ucode; + } + else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx + { + goto Invalid; + } + else if(ucode < 0xE0) // 110abcde 10fghijk + { + if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF || + (*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80) + { + *dest = 0xFFFD; + } + dest++; + } + else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop + { + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c1 = *usrc; + if (c1 < 0x80 || c1 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c2 = *usrc; + if (c2 < 0x80 || c2 > 0xBF) + { + goto Invalid; + } + usrc++; + ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F); + if (ucode < 0x800 || ucode >= 0xD800 && ucode <= 0xDFFF) + { + goto Invalid; + } + *dest++ = ucode; + } + else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu + { + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c1 = *usrc; + if (c1 < 0x80 || c1 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c2 = *usrc; + if (c2 < 0x80 || c2 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c3 = *usrc; + if (c3 < 0x80 || c3 > 0xBF) + { + goto Invalid; + } + usrc++; + ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F); + + if (ucode < 0x10000 // overlong encoding + || ucode > 0x10FFFF // exceeds Unicode range + || ucode >= 0xD800 && ucode <= 0xDFFF) // surrogate pairs + { + goto Invalid; + } + if (dest >= destEnd - 1) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return cchDest - (destEnd - dest); + } + ucode -= 0x10000; + // Lead surrogate + *dest++ = 0xD800 + (ucode >> 10); + // Trail surrogate + *dest++ = 0xDC00 + (ucode & 0x3FF); + } + else // invalid + { + Invalid: + *dest++ = 0xFFFD; + } + } + if (!*pErrorCode) + { + *pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS; + } + return cchDest - (destEnd - dest); +} + size_t SystemLocale::ToUtf16( UINT srcCodePage, const char * src, SSIZE_T cchSrc, WCHAR * dest, size_t cchDest, DWORD * pErrorCode ) { srcCodePage = ExpandSpecialCP( srcCodePage ); + if ( dest ) + { + if ( srcCodePage == CP_UTF8 ) + { + return SystemLocale::Utf8To16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode ); + } + else if ( srcCodePage == 1252 ) + { + return SystemLocale::CP1252ToUtf16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode ); + } + } EncodingConverter cvt( CP_UTF16, srcCodePage ); if ( !cvt.Initialize() ) { @@ -366,9 +531,153 @@ size_t SystemLocale::ToUtf16Strict( UINT srcCodePage, const char * src, SSIZE_T return cvt.Convert( dest, cchDest, src, cchSrcActual, true, &hasLoss, pErrorCode ); } +size_t SystemLocale::Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode ) +{ + const WCHAR *srcEnd = src + cchSrc; + char *destEnd = dest + cchDest; + DWORD dummyError; + if (!pErrorCode) + { + pErrorCode = &dummyError; + } + *pErrorCode = 0; + + // null dest is a special mode to calculate the output size required. + if (!dest) + { + size_t cbOut = 0; + while (src < srcEnd) + { + DWORD wch = *src++; + if (wch < 128) // most common case. + { + cbOut++; + } + else if (wch < 0x800) // 127 to 2047: 2 bytes + { + cbOut += 2; + } + else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes + { + cbOut += 3; + } + else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes + { + if (src >= srcEnd) + { + cbOut += 3; // lone surrogate at end + } + else if (*src < 0xDC00 || *src > 0xDFFF) + { + cbOut += 3; // low surrogate not followed by high + } + else + { + cbOut += 4; + } + } + else // unexpected trail surrogate + { + cbOut += 3; + } + } + return cbOut; + } + while ( src < srcEnd && dest < destEnd ) + { + DWORD wch = *src++; + if (wch < 128) // most common case. + { + *dest++ = wch; + } + else if (wch < 0x800) // 127 to 2047: 2 bytes + { + if (destEnd - dest < 2) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xC0 | (wch >> 6); + *dest++ = 0x80 | (wch & 0x3F); + } + else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes + { + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xE0 | (wch >> 12); + *dest++ = 0x80 | (wch >> 6)&0x3F; + *dest++ = 0x80 | (wch &0x3F); + } + else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes + { + if (src >= srcEnd) + { + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xEF; + *dest++ = 0xBF; + *dest++ = 0xBD; + continue; + } + if (*src < 0xDC00 || *src > 0xDFFF) + { + // low surrogate not followed by high + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xEF; + *dest++ = 0xBF; + *dest++ = 0xBD; + continue; + } + wch = 0x10000 + ((wch - 0xD800)<<10) + *src++ - 0xDC00; + if (destEnd - dest < 4) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xF0 | (wch >> 18); + *dest++ = 0x80 | (wch >>12)&0x3F; + *dest++ = 0x80 | (wch >> 6)&0x3F; + *dest++ = 0x80 | wch&0x3F; + } + else // unexpected trail surrogate + { + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xEF; + *dest++ = 0xBF; + *dest++ = 0xBD; + } + } + if (!*pErrorCode) + { + *pErrorCode = (dest == destEnd && src != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS; + } + return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest); +} + size_t SystemLocale::FromUtf16( UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, char * dest, size_t cchDest, bool * pHasDataLoss, DWORD * pErrorCode ) { destCodePage = ExpandSpecialCP( destCodePage ); + if ( destCodePage == CP_UTF8 ) + { + pHasDataLoss && (*pHasDataLoss = 0); + return SystemLocale::Utf8From16( src, cchSrc < 0 ? 1+mplat_wcslen(src) : cchSrc, dest, cchDest, pErrorCode ); + } EncodingConverter cvt( destCodePage, CP_UTF16 ); if ( !cvt.Initialize() ) { From 992b05f4073a853a5464c9700cd6a9288fc846a4 Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Wed, 21 Nov 2018 14:24:32 -0800 Subject: [PATCH 2/6] Modified conversion functions to be strict about failed Unicode translations --- source/shared/core_stmt.cpp | 4 +- source/shared/core_util.cpp | 3 +- source/shared/localization.hpp | 2 + source/shared/localizationimpl.cpp | 271 +++++++++++++++++++++++++++++ 4 files changed, 276 insertions(+), 4 deletions(-) diff --git a/source/shared/core_stmt.cpp b/source/shared/core_stmt.cpp index 2fc5b96e8..e9158af6d 100644 --- a/source/shared/core_stmt.cpp +++ b/source/shared/core_stmt.cpp @@ -1829,7 +1829,7 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve // if the parameter is an input parameter, calc the size of the necessary buffer from the length of the string #ifndef _WIN32 - wchar_size = buffer_len;//SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); + wchar_size = buffer_len; #else wchar_size = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); #endif // !_WIN32 @@ -1842,7 +1842,7 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve wbuffer = reinterpret_cast( sqlsrv_malloc( (wchar_size + 1) * sizeof( SQLWCHAR ) )); // convert the utf-8 string to a wchar string in the new buffer #ifndef _WIN32 - int r = SystemLocale::ToUtf16( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); + int r = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #else int r = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #endif // !_WIN32 diff --git a/source/shared/core_util.cpp b/source/shared/core_util.cpp index 33089e871..005543af8 100644 --- a/source/shared/core_util.cpp +++ b/source/shared/core_util.cpp @@ -148,7 +148,6 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( // calculate the number of characters needed #ifndef _WIN32 - //~ cchOutLen = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, NULL, 0 ); cchOutLen = 4*cchInLen; #else cchOutLen = WideCharToMultiByte( encoding, flags, @@ -165,7 +164,7 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( memset(newString, '\0', cchOutLen+1); #ifndef _WIN32 - int rc = SystemLocale::FromUtf16( encoding, inString, cchInLen, newString, static_cast(cchOutLen)); + int rc = SystemLocale::FromUtf16Strict( encoding, inString, cchInLen, newString, static_cast(cchOutLen)); #else int rc = WideCharToMultiByte( encoding, flags, inString, cchInLen, newString, static_cast(cchOutLen), NULL, NULL ); #endif // !_WIN32 diff --git a/source/shared/localization.hpp b/source/shared/localization.hpp index 3a49d4602..41518471e 100644 --- a/source/shared/localization.hpp +++ b/source/shared/localization.hpp @@ -175,6 +175,8 @@ class SystemLocale // UTF8/16 conversion which does not involve iconv static size_t Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ); static size_t Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode ); + static size_t Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ); + static size_t Utf8From16Strict( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode ); // ----------------------------------------------------------------------- // Public Member Functions diff --git a/source/shared/localizationimpl.cpp b/source/shared/localizationimpl.cpp index afc6c2e0b..22695d0d9 100644 --- a/source/shared/localizationimpl.cpp +++ b/source/shared/localizationimpl.cpp @@ -490,6 +490,132 @@ size_t SystemLocale::Utf8To16( const char *src, SSIZE_T cchSrc, WCHAR *dest, siz return cchDest - (destEnd - dest); } +size_t SystemLocale::Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *dest, size_t cchDest, DWORD *pErrorCode ) +{ + const unsigned char *usrc = reinterpret_cast(src); + const unsigned char *srcEnd = usrc + cchSrc; + const WCHAR *destEnd = dest + cchDest; + DWORD dummyError; + if (!pErrorCode) + { + pErrorCode = &dummyError; + } + *pErrorCode = 0; + + while(usrc < srcEnd && dest < destEnd) + { + DWORD ucode = *usrc++; + if(ucode <= 127) // Most common case for ASCII + { + *dest++ = ucode; + } + else if(ucode < 0xC0) // unexpected trailing byte 10xxxxxx + { + goto Invalid; + } + else if(ucode < 0xE0) // 110abcde 10fghijk + { + if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF || + (*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80) + { + *dest = 0xFFFD; + } + dest++; + } + else if(ucode < 0xF0) // 1110abcd 10efghij 10klmnop + { + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c1 = *usrc; + if (c1 < 0x80 || c1 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c2 = *usrc; + if (c2 < 0x80 || c2 > 0xBF) + { + goto Invalid; + } + usrc++; + ucode = (ucode&15)<<12 | (c1&0x3F)<<6 | (c2&0x3F); + if (ucode < 0x800 || ucode >= 0xD800 && ucode <= 0xDFFF) + { + goto Invalid; + } + *dest++ = ucode; + } + else if(ucode < 0xF8) // 11110abc 10defghi 10jklmno 10pqrstu + { + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c1 = *usrc; + if (c1 < 0x80 || c1 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c2 = *usrc; + if (c2 < 0x80 || c2 > 0xBF) + { + goto Invalid; + } + usrc++; + if (usrc >= srcEnd) + { + goto Invalid; + } + DWORD c3 = *usrc; + if (c3 < 0x80 || c3 > 0xBF) + { + goto Invalid; + } + usrc++; + ucode = (ucode&7)<<18 | (c1&0x3F)<<12 | (c2&0x3F)<<6 | (c3&0x3F); + + if (ucode < 0x10000 // overlong encoding + || ucode > 0x10FFFF // exceeds Unicode range + || ucode >= 0xD800 && ucode <= 0xDFFF) // surrogate pairs + { + goto Invalid; + } + if (dest >= destEnd - 1) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return cchDest - (destEnd - dest); + } + ucode -= 0x10000; + // Lead surrogate + *dest++ = 0xD800 + (ucode >> 10); + // Trail surrogate + *dest++ = 0xDC00 + (ucode & 0x3FF); + } + else // invalid + { + Invalid: + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; + return 0 ; + } + } + if (!*pErrorCode) + { + *pErrorCode = (dest == destEnd && usrc != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS; + } + return cchDest - (destEnd - dest); +} + size_t SystemLocale::ToUtf16( UINT srcCodePage, const char * src, SSIZE_T cchSrc, WCHAR * dest, size_t cchDest, DWORD * pErrorCode ) { srcCodePage = ExpandSpecialCP( srcCodePage ); @@ -519,6 +645,17 @@ size_t SystemLocale::ToUtf16( UINT srcCodePage, const char * src, SSIZE_T cchSrc size_t SystemLocale::ToUtf16Strict( UINT srcCodePage, const char * src, SSIZE_T cchSrc, WCHAR * dest, size_t cchDest, DWORD * pErrorCode ) { srcCodePage = ExpandSpecialCP( srcCodePage ); + if ( dest ) + { + if ( srcCodePage == CP_UTF8 ) + { + return SystemLocale::Utf8To16Strict( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode ); + } + else if ( srcCodePage == 1252 ) + { + return SystemLocale::CP1252ToUtf16( src, cchSrc < 0 ? (1+strlen(src)) : cchSrc, dest, cchDest, pErrorCode ); + } + } EncodingConverter cvt( CP_UTF16, srcCodePage ); if ( !cvt.Initialize() ) { @@ -670,6 +807,135 @@ size_t SystemLocale::Utf8From16( const WCHAR *src, SSIZE_T cchSrc, char *dest, s return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest); } +size_t SystemLocale::Utf8From16Strict( const WCHAR *src, SSIZE_T cchSrc, char *dest, size_t cchDest, DWORD *pErrorCode ) +{ + const WCHAR *srcEnd = src + cchSrc; + char *destEnd = dest + cchDest; + DWORD dummyError; + if (!pErrorCode) + { + pErrorCode = &dummyError; + } + *pErrorCode = 0; + + // null dest is a special mode to calculate the output size required. + if (!dest) + { + size_t cbOut = 0; + while (src < srcEnd) + { + DWORD wch = *src++; + if (wch < 128) // most common case. + { + cbOut++; + } + else if (wch < 0x800) // 127 to 2047: 2 bytes + { + cbOut += 2; + } + else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes + { + cbOut += 3; + } + else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes + { + if (src >= srcEnd) + { + cbOut += 3; // lone surrogate at end + } + else if (*src < 0xDC00 || *src > 0xDFFF) + { + cbOut += 3; // low surrogate not followed by high + } + else + { + cbOut += 4; + } + } + else // unexpected trail surrogate + { + cbOut += 3; + } + } + return cbOut; + } + while ( src < srcEnd && dest < destEnd ) + { + DWORD wch = *src++; + if (wch < 128) // most common case. + { + *dest++ = wch; + } + else if (wch < 0x800) // 127 to 2047: 2 bytes + { + if (destEnd - dest < 2) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xC0 | (wch >> 6); + *dest++ = 0x80 | (wch & 0x3F); + } + else if (wch < 0xD800 || wch > 0xDFFF) // 2048 to 55295 and 57344 to 65535: 3 bytes + { + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xE0 | (wch >> 12); + *dest++ = 0x80 | (wch >> 6)&0x3F; + *dest++ = 0x80 | (wch &0x3F); + } + else if (wch < 0xDC00) // 65536 to end of Unicode: 4 bytes + { + if (src >= srcEnd) + { + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + } + + return 0; + } + if (*src < 0xDC00 || *src > 0xDFFF) + { + // low surrogate not followed by high + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + } + return 0; + } + wch = 0x10000 + ((wch - 0xD800)<<10) + *src++ - 0xDC00; + if (destEnd - dest < 4) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + return 0; + } + *dest++ = 0xF0 | (wch >> 18); + *dest++ = 0x80 | (wch >>12)&0x3F; + *dest++ = 0x80 | (wch >> 6)&0x3F; + *dest++ = 0x80 | wch&0x3F; + } + else // unexpected trail surrogate + { + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // lone surrogate at end + if (destEnd - dest < 3) + { + *pErrorCode = ERROR_INSUFFICIENT_BUFFER; + } + return 0; + } + } + if (!*pErrorCode) + { + *pErrorCode = (dest == destEnd && src != srcEnd) ? ERROR_INSUFFICIENT_BUFFER : ERROR_SUCCESS; + } + return *pErrorCode == ERROR_INSUFFICIENT_BUFFER ? 0 : cchDest - (destEnd - dest); +} + size_t SystemLocale::FromUtf16( UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, char * dest, size_t cchDest, bool * pHasDataLoss, DWORD * pErrorCode ) { destCodePage = ExpandSpecialCP( destCodePage ); @@ -693,6 +959,11 @@ size_t SystemLocale::FromUtf16( UINT destCodePage, const WCHAR * src, SSIZE_T cc size_t SystemLocale::FromUtf16Strict(UINT destCodePage, const WCHAR * src, SSIZE_T cchSrc, char * dest, size_t cchDest, bool * pHasDataLoss, DWORD * pErrorCode) { destCodePage = ExpandSpecialCP(destCodePage); + if ( destCodePage == CP_UTF8 ) + { + pHasDataLoss && (*pHasDataLoss = 0); + return SystemLocale::Utf8From16Strict( src, cchSrc < 0 ? 1+mplat_wcslen(src) : cchSrc, dest, cchDest, pErrorCode ); + } EncodingConverter cvt(destCodePage, CP_UTF16); if (!cvt.Initialize()) { From a701ea1b61f8bc65366d863fb5737e8d8bbc6af9 Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Wed, 21 Nov 2018 15:24:10 -0800 Subject: [PATCH 3/6] Added an error code --- source/shared/localizationimpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/shared/localizationimpl.cpp b/source/shared/localizationimpl.cpp index 22695d0d9..de61f2c26 100644 --- a/source/shared/localizationimpl.cpp +++ b/source/shared/localizationimpl.cpp @@ -901,7 +901,7 @@ size_t SystemLocale::Utf8From16Strict( const WCHAR *src, SSIZE_T cchSrc, char *d } if (*src < 0xDC00 || *src > 0xDFFF) { - // low surrogate not followed by high + *pErrorCode = ERROR_NO_UNICODE_TRANSLATION; // low surrogate not followed by high if (destEnd - dest < 3) { *pErrorCode = ERROR_INSUFFICIENT_BUFFER; From 729bee27f7a6d8c6df934fbd328ad123613f06b2 Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Mon, 26 Nov 2018 16:49:13 -0800 Subject: [PATCH 4/6] Fixed stream bugs --- source/shared/core_stmt.cpp | 252 ++++++++++++++--------------- source/shared/localizationimpl.cpp | 2 +- 2 files changed, 127 insertions(+), 127 deletions(-) diff --git a/source/shared/core_stmt.cpp b/source/shared/core_stmt.cpp index 2b6f46065..043809492 100644 --- a/source/shared/core_stmt.cpp +++ b/source/shared/core_stmt.cpp @@ -97,7 +97,7 @@ size_t calc_utf8_missing( _Inout_ sqlsrv_stmt* stmt, _In_reads_(buffer_end) cons bool check_for_next_stream_parameter( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ); bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* convert_param_z ); void core_get_field_common(_Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _Inout_ sqlsrv_phptype - sqlsrv_php_type, _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC); + sqlsrv_php_type, _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC); // returns the ODBC C type constant that matches the PHP type and encoding given SQLSMALLINT default_c_type( _Inout_ sqlsrv_stmt* stmt, _In_opt_ SQLULEN paramno, _In_ zval const* param_z, _In_ SQLSRV_ENCODING encoding TSRMLS_DC ); void default_sql_size_and_scale( _Inout_ sqlsrv_stmt* stmt, _In_opt_ unsigned int paramno, _In_ zval* param_z, _In_ SQLSRV_ENCODING encoding, @@ -110,7 +110,7 @@ void field_cache_dtor( _Inout_ zval* data_z ); void format_decimal_numbers(_In_ SQLSMALLINT decimals_digits, _In_ SQLSMALLINT field_scale, _Inout_updates_bytes_(*field_len) char*& field_value, _Inout_ SQLLEN* field_len); void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ); void get_field_as_string( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _Inout_ sqlsrv_phptype sqlsrv_php_type, - _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC ); + _Inout_updates_bytes_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len TSRMLS_DC ); stmt_option const* get_stmt_option( sqlsrv_conn const* conn, _In_ zend_ulong key, _In_ const stmt_option stmt_opts[] TSRMLS_DC ); bool is_valid_sqlsrv_phptype( _In_ sqlsrv_phptype type ); // assure there is enough space for the output parameter string @@ -149,7 +149,7 @@ sqlsrv_stmt::sqlsrv_stmt( _In_ sqlsrv_conn* c, _In_ SQLHANDLE handle, _In_ error current_stream( NULL, SQLSRV_ENCODING_DEFAULT ), current_stream_read( 0 ) { - ZVAL_UNDEF( &active_stream ); + ZVAL_UNDEF( &active_stream ); // initialize the input string parameters array (which holds zvals) core::sqlsrv_array_init( *conn, ¶m_input_strings TSRMLS_CC ); @@ -261,7 +261,7 @@ void sqlsrv_stmt::new_result_set( TSRMLS_D ) sqlsrv_stmt* core_sqlsrv_create_stmt( _Inout_ sqlsrv_conn* conn, _In_ driver_stmt_factory stmt_factory, _In_opt_ HashTable* options_ht, _In_opt_ const stmt_option valid_stmt_opts[], _In_ error_callback const err, _In_opt_ void* driver TSRMLS_DC ) { - sqlsrv_malloc_auto_ptr stmt; + sqlsrv_malloc_auto_ptr stmt; SQLHANDLE stmt_h = SQL_NULL_HANDLE; sqlsrv_stmt* return_stmt = NULL; @@ -279,26 +279,26 @@ sqlsrv_stmt* core_sqlsrv_create_stmt( _Inout_ sqlsrv_conn* conn, _In_ driver_stm // process the options array given to core_sqlsrv_prepare. if( options_ht && zend_hash_num_elements( options_ht ) > 0 && valid_stmt_opts ) { - zend_ulong index = -1; - zend_string *key = NULL; - zval* value_z = NULL; + zend_ulong index = -1; + zend_string *key = NULL; + zval* value_z = NULL; - ZEND_HASH_FOREACH_KEY_VAL( options_ht, index, key, value_z ) { + ZEND_HASH_FOREACH_KEY_VAL( options_ht, index, key, value_z ) { - int type = key ? HASH_KEY_IS_STRING : HASH_KEY_IS_LONG; + int type = key ? HASH_KEY_IS_STRING : HASH_KEY_IS_LONG; - // The driver layer should ensure a valid key. - DEBUG_SQLSRV_ASSERT(( type == HASH_KEY_IS_LONG ), "allocate_stmt: Invalid statment option key provided." ); + // The driver layer should ensure a valid key. + DEBUG_SQLSRV_ASSERT(( type == HASH_KEY_IS_LONG ), "allocate_stmt: Invalid statment option key provided." ); - const stmt_option* stmt_opt = get_stmt_option( stmt->conn, index, valid_stmt_opts TSRMLS_CC ); + const stmt_option* stmt_opt = get_stmt_option( stmt->conn, index, valid_stmt_opts TSRMLS_CC ); - // if the key didn't match, then return the error to the script. - // The driver layer should ensure that the key is valid. - DEBUG_SQLSRV_ASSERT( stmt_opt != NULL, "allocate_stmt: unexpected null value for statement option." ); + // if the key didn't match, then return the error to the script. + // The driver layer should ensure that the key is valid. + DEBUG_SQLSRV_ASSERT( stmt_opt != NULL, "allocate_stmt: unexpected null value for statement option." ); - // perform the actions the statement option needs done. - (*stmt_opt->func)( stmt, stmt_opt, value_z TSRMLS_CC ); - } ZEND_HASH_FOREACH_END(); + // perform the actions the statement option needs done. + (*stmt_opt->func)( stmt, stmt_opt, value_z TSRMLS_CC ); + } ZEND_HASH_FOREACH_END(); } return_stmt = stmt; @@ -494,7 +494,7 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_ ind_ptr = buffer_len; if( direction != SQL_PARAM_INPUT ){ // save the parameter so that 1) the buffer doesn't go away, and 2) we can set it to NULL if returned - sqlsrv_output_param output_param( param_ref, static_cast( param_num ), zval_was_bool, php_out_type); + sqlsrv_output_param output_param( param_ref, static_cast( param_num ), zval_was_bool, php_out_type); save_output_param_for_later( stmt, output_param TSRMLS_CC ); } } @@ -502,11 +502,11 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_ case IS_DOUBLE: { buffer = ¶m_z->value; - buffer_len = sizeof( Z_DVAL_P( param_z )); + buffer_len = sizeof( Z_DVAL_P( param_z )); ind_ptr = buffer_len; if( direction != SQL_PARAM_INPUT ){ // save the parameter so that 1) the buffer doesn't go away, and 2) we can set it to NULL if returned - sqlsrv_output_param output_param( param_ref, static_cast( param_num ), zval_was_bool, php_out_type); + sqlsrv_output_param output_param( param_ref, static_cast( param_num ), zval_was_bool, php_out_type); save_output_param_for_later( stmt, output_param TSRMLS_CC ); } } @@ -620,10 +620,10 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_ zval buffer_z; zval format_z; zval params[1]; - ZVAL_UNDEF( &function_z ); - ZVAL_UNDEF( &buffer_z ); - ZVAL_UNDEF( &format_z ); - ZVAL_UNDEF( params ); + ZVAL_UNDEF( &function_z ); + ZVAL_UNDEF( &buffer_z ); + ZVAL_UNDEF( &format_z ); + ZVAL_UNDEF( params ); bool valid_class_name_found = false; @@ -652,23 +652,23 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_ // meaning there is too much information in the character string. If the user specifies the 'datetimeoffset' // sql type, it lacks the timezone. if( sql_type == SQL_SS_TIMESTAMPOFFSET ){ - core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATETIMEOFFSET_FORMAT ), + core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATETIMEOFFSET_FORMAT ), DateTime::DATETIMEOFFSET_FORMAT_LEN ); } else if( sql_type == SQL_TYPE_DATE ){ - core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATE_FORMAT ), DateTime::DATE_FORMAT_LEN ); + core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATE_FORMAT ), DateTime::DATE_FORMAT_LEN ); } else{ - core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATETIME_FORMAT ), DateTime::DATETIME_FORMAT_LEN ); + core::sqlsrv_zval_stringl( &format_z, const_cast( DateTime::DATETIME_FORMAT ), DateTime::DATETIME_FORMAT_LEN ); } // call the DateTime::format member function to convert the object to a string that SQL Server understands - core::sqlsrv_zval_stringl( &function_z, "format", sizeof( "format" ) - 1 ); + core::sqlsrv_zval_stringl( &function_z, "format", sizeof( "format" ) - 1 ); params[0] = format_z; // This is equivalent to the PHP code: $param_z->format( $format_z ); where param_z is the // DateTime object and $format_z is the format string. int zr = call_user_function( EG( function_table ), param_z, &function_z, &buffer_z, 1, params TSRMLS_CC ); - zend_string_release( Z_STR( format_z )); - zend_string_release( Z_STR( function_z )); + zend_string_release( Z_STR( format_z )); + zend_string_release( Z_STR( function_z )); CHECK_CUSTOM_ERROR( zr == FAILURE, stmt, SQLSRV_ERROR_INVALID_PARAMETER_PHPTYPE, param_num + 1 ){ throw core::CoreException(); } @@ -695,7 +695,7 @@ void core_sqlsrv_bind_param( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT param_ } core::SQLBindParameter( stmt, param_num + 1, direction, - c_type, sql_type, column_size, decimal_digits, buffer, buffer_len, &ind_ptr TSRMLS_CC ); + c_type, sql_type, column_size, decimal_digits, buffer, buffer_len, &ind_ptr TSRMLS_CC ); if ( stmt->conn->ce_option.enabled && sql_type == SQL_TYPE_TIMESTAMP ) { if( decimal_digits == 3 ) @@ -884,14 +884,14 @@ field_meta_data* core_sqlsrv_field_metadata( _Inout_ sqlsrv_stmt* stmt, _In_ SQL meta_data = new ( sqlsrv_malloc( sizeof( field_meta_data ))) field_meta_data(); field_name_temp = static_cast( sqlsrv_malloc( ( SS_MAXCOLNAMELEN + 1 ) * sizeof( SQLWCHAR ) )); SQLSRV_ENCODING encoding = ( (stmt->encoding() == SQLSRV_ENCODING_DEFAULT ) ? stmt->conn->encoding() : stmt->encoding()); - try{ + try{ core::SQLDescribeColW( stmt, colno + 1, field_name_temp, SS_MAXCOLNAMELEN + 1, &field_len_temp, &( meta_data->field_type ), & ( meta_data->field_size ), & ( meta_data->field_scale ), &( meta_data->field_is_nullable ) TSRMLS_CC ); - } - catch ( core::CoreException& e ) { - throw e; - } + } + catch ( core::CoreException& e ) { + throw e; + } bool converted = convert_string_from_utf16( encoding, field_name_temp, field_len_temp, ( char** ) &( meta_data->field_name ), field_name_len ); @@ -946,50 +946,50 @@ field_meta_data* core_sqlsrv_field_metadata( _Inout_ sqlsrv_stmt* stmt, _In_ SQL // Nothing, excpetion thrown if an error occurs void core_sqlsrv_get_field( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_index, _In_ sqlsrv_phptype sqlsrv_php_type_in, _In_ bool prefer_string, - _Outref_result_bytebuffer_maybenull_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len, _In_ bool cache_field, - _Out_ SQLSRV_PHPTYPE *sqlsrv_php_type_out TSRMLS_DC) + _Outref_result_bytebuffer_maybenull_(*field_len) void*& field_value, _Inout_ SQLLEN* field_len, _In_ bool cache_field, + _Out_ SQLSRV_PHPTYPE *sqlsrv_php_type_out TSRMLS_DC) { - try { - - // close the stream to release the resource - close_active_stream(stmt TSRMLS_CC); - - // if the field has been retrieved before, return the previous result - field_cache* cached = NULL; - if (NULL != ( cached = static_cast( zend_hash_index_find_ptr( Z_ARRVAL( stmt->field_cache ), static_cast( field_index ))))) { - // the field value is NULL - if( cached->value == NULL ) { - field_value = NULL; - *field_len = 0; - if( sqlsrv_php_type_out ) { *sqlsrv_php_type_out = SQLSRV_PHPTYPE_NULL; } - } - else { - - field_value = sqlsrv_malloc( cached->len, sizeof( char ), 1 ); - memcpy_s( field_value, ( cached->len * sizeof( char )), cached->value, cached->len ); - if( cached->type.typeinfo.type == SQLSRV_PHPTYPE_STRING) { - // prevent the 'string not null terminated' warning - reinterpret_cast( field_value )[cached->len] = '\0'; - } - *field_len = cached->len; - if( sqlsrv_php_type_out) { *sqlsrv_php_type_out = static_cast(cached->type.typeinfo.type); } - } - return; - } - - sqlsrv_phptype sqlsrv_php_type = sqlsrv_php_type_in; - - SQLLEN sql_field_type = 0; - SQLLEN sql_field_len = 0; - - // Make sure that the statement was executed and not just prepared. - CHECK_CUSTOM_ERROR( !stmt->executed, stmt, SQLSRV_ERROR_STATEMENT_NOT_EXECUTED ) { - throw core::CoreException(); - } - - // if the field is to be cached, and this field is being retrieved out of order, cache prior fields so they - // may also be retrieved. - if( cache_field && (field_index - stmt->last_field_index ) >= 2 ) { + try { + + // close the stream to release the resource + close_active_stream(stmt TSRMLS_CC); + + // if the field has been retrieved before, return the previous result + field_cache* cached = NULL; + if (NULL != ( cached = static_cast( zend_hash_index_find_ptr( Z_ARRVAL( stmt->field_cache ), static_cast( field_index ))))) { + // the field value is NULL + if( cached->value == NULL ) { + field_value = NULL; + *field_len = 0; + if( sqlsrv_php_type_out ) { *sqlsrv_php_type_out = SQLSRV_PHPTYPE_NULL; } + } + else { + + field_value = sqlsrv_malloc( cached->len, sizeof( char ), 1 ); + memcpy_s( field_value, ( cached->len * sizeof( char )), cached->value, cached->len ); + if( cached->type.typeinfo.type == SQLSRV_PHPTYPE_STRING) { + // prevent the 'string not null terminated' warning + reinterpret_cast( field_value )[cached->len] = '\0'; + } + *field_len = cached->len; + if( sqlsrv_php_type_out) { *sqlsrv_php_type_out = static_cast(cached->type.typeinfo.type); } + } + return; + } + + sqlsrv_phptype sqlsrv_php_type = sqlsrv_php_type_in; + + SQLLEN sql_field_type = 0; + SQLLEN sql_field_len = 0; + + // Make sure that the statement was executed and not just prepared. + CHECK_CUSTOM_ERROR( !stmt->executed, stmt, SQLSRV_ERROR_STATEMENT_NOT_EXECUTED ) { + throw core::CoreException(); + } + + // if the field is to be cached, and this field is being retrieved out of order, cache prior fields so they + // may also be retrieved. + if( cache_field && (field_index - stmt->last_field_index ) >= 2 ) { sqlsrv_phptype invalid; invalid.typeinfo.type = SQLSRV_PHPTYPE_INVALID; for( int i = stmt->last_field_index + 1; i < field_index; ++i ) { @@ -1019,27 +1019,27 @@ void core_sqlsrv_get_field( _Inout_ sqlsrv_stmt* stmt, _In_ SQLUSMALLINT field_i sqlsrv_php_type = stmt->sql_type_to_php_type(static_cast(sql_field_type), static_cast(sql_field_len), prefer_string); } - // Verify that we have an acceptable type to convert. - CHECK_CUSTOM_ERROR( !is_valid_sqlsrv_phptype( sqlsrv_php_type ), stmt, SQLSRV_ERROR_INVALID_TYPE ) { - throw core::CoreException(); - } + // Verify that we have an acceptable type to convert. + CHECK_CUSTOM_ERROR( !is_valid_sqlsrv_phptype( sqlsrv_php_type ), stmt, SQLSRV_ERROR_INVALID_TYPE ) { + throw core::CoreException(); + } - if( sqlsrv_php_type_out != NULL ) - *sqlsrv_php_type_out = static_cast( sqlsrv_php_type.typeinfo.type ); + if( sqlsrv_php_type_out != NULL ) + *sqlsrv_php_type_out = static_cast( sqlsrv_php_type.typeinfo.type ); - // Retrieve the data - core_get_field_common( stmt, field_index, sqlsrv_php_type, field_value, field_len TSRMLS_CC ); + // Retrieve the data + core_get_field_common( stmt, field_index, sqlsrv_php_type, field_value, field_len TSRMLS_CC ); - // if the user wants us to cache the field, we'll do it - if( cache_field ) { - field_cache cache( field_value, *field_len, sqlsrv_php_type ); - core::sqlsrv_zend_hash_index_update_mem( *stmt, Z_ARRVAL( stmt->field_cache ), field_index, &cache, sizeof(field_cache) TSRMLS_CC ); - } - } + // if the user wants us to cache the field, we'll do it + if( cache_field ) { + field_cache cache( field_value, *field_len, sqlsrv_php_type ); + core::sqlsrv_zend_hash_index_update_mem( *stmt, Z_ARRVAL( stmt->field_cache ), field_index, &cache, sizeof(field_cache) TSRMLS_CC ); + } + } - catch( core::CoreException& e ) { - throw e; - } + catch( core::CoreException& e ) { + throw e; + } } // core_sqlsrv_has_any_result @@ -1332,14 +1332,14 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ) // read the data from the stream, send it via SQLPutData and track how much we've sent. else { char buffer[PHP_STREAM_BUFFER_SIZE + 1] = {'\0'}; - std::size_t buffer_size = sizeof( buffer ) - 3; // -3 to preserve enough space for a cut off UTF-8 character + std::size_t buffer_size = sizeof( buffer ) - 3; // -3 to preserve enough space for a cut off UTF-8 character std::size_t read = php_stream_read( param_stream, buffer, buffer_size ); - if (read > UINT_MAX) - { - LOG(SEV_ERROR, "PHP stream: buffer length exceeded."); - throw core::CoreException(); - } + if (read > UINT_MAX) + { + LOG(SEV_ERROR, "PHP stream: buffer length exceeded."); + throw core::CoreException(); + } stmt->current_stream_read += static_cast( read ); if (read == 0) { @@ -1359,8 +1359,8 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ) // expansion of 2x the UTF-8 size. SQLWCHAR wbuffer[PHP_STREAM_BUFFER_SIZE + 1] = {L'\0'}; int wbuffer_size = static_cast( sizeof( wbuffer ) / sizeof( SQLWCHAR )); - DWORD last_error_code = ERROR_SUCCESS; - // buffer_size is the # of wchars. Since it set to stmt->param_buffer_size / 2, this is accurate + DWORD last_error_code = ERROR_SUCCESS; + // buffer_size is the # of wchars. Since it set to stmt->param_buffer_size / 2, this is accurate #ifndef _WIN32 int wsize = SystemLocale::ToUtf16Strict( stmt->current_stream.encoding, buffer, static_cast(read), wbuffer, wbuffer_size, &last_error_code ); #else @@ -1368,7 +1368,7 @@ bool core_sqlsrv_send_stream_packet( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ) last_error_code = GetLastError(); #endif // !_WIN32 - if( wsize == 0 && last_error_code == ERROR_NO_UNICODE_TRANSLATION ) { + if( wsize == 0 && last_error_code == ERROR_NO_UNICODE_TRANSLATION ) { // this will calculate how many bytes were cut off from the last UTF-8 character and read that many more // in, then reattempt the conversion. If it fails the second time, then an error is returned. @@ -1971,7 +1971,7 @@ void default_sql_type( _Inout_ sqlsrv_stmt* stmt, _In_opt_ SQLULEN paramno, _In_ _Out_ SQLSMALLINT& sql_type TSRMLS_DC ) { sql_type = SQL_UNKNOWN_TYPE; - int php_type = Z_TYPE_P(param_z); + int php_type = Z_TYPE_P(param_z); switch( php_type ) { case IS_NULL: @@ -2111,7 +2111,7 @@ void field_cache_dtor( _Inout_ zval* data_z ) { sqlsrv_free( cache->value ); } - sqlsrv_free( cache ); + sqlsrv_free( cache ); } // To be called for formatting decimal / numeric fetched values from finalize_output_parameters() and/or get_field_as_string() @@ -2251,13 +2251,13 @@ void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ) return; HashTable* params_ht = Z_ARRVAL( stmt->output_params ); - zend_ulong index = -1; - zend_string* key = NULL; - void* output_param_temp = NULL; + zend_ulong index = -1; + zend_string* key = NULL; + void* output_param_temp = NULL; - ZEND_HASH_FOREACH_KEY_PTR( params_ht, index, key, output_param_temp ) { - sqlsrv_output_param* output_param = static_cast( output_param_temp ); - zval* value_z = Z_REFVAL_P( output_param->param_z ); + ZEND_HASH_FOREACH_KEY_PTR( params_ht, index, key, output_param_temp ) { + sqlsrv_output_param* output_param = static_cast( output_param_temp ); + zval* value_z = Z_REFVAL_P( output_param->param_z ); switch( Z_TYPE_P( value_z )) { case IS_STRING: { @@ -2377,7 +2377,7 @@ void finalize_output_parameters( _Inout_ sqlsrv_stmt* stmt TSRMLS_DC ) DIE( "Illegal or unknown output parameter type. This should have been caught in core_sqlsrv_bind_parameter." ); break; } - value_z = NULL; + value_z = NULL; } ZEND_HASH_FOREACH_END(); // empty the hash table since it's been processed @@ -2772,24 +2772,24 @@ void resize_output_buffer_if_necessary( _Inout_ sqlsrv_stmt* stmt, _Inout_ zval* // allocate enough space to ALWAYS include the NULL regardless of the type being retrieved since // we set the last byte(s) to be NULL to avoid the debug build warning from the Zend engine about // not having a NULL terminator on a string. - zend_string* param_z_string = zend_string_realloc( Z_STR_P(param_z), expected_len, 0 ); + zend_string* param_z_string = zend_string_realloc( Z_STR_P(param_z), expected_len, 0 ); // A zval string len doesn't include the null. This calculates the length it should be // regardless of whether the ODBC type contains the NULL or not. // null terminate the string to avoid a warning in debug PHP builds - ZSTR_VAL(param_z_string)[without_null_len] = '\0'; - ZVAL_NEW_STR(param_z, param_z_string); + ZSTR_VAL(param_z_string)[without_null_len] = '\0'; + ZVAL_NEW_STR(param_z, param_z_string); - // buffer_len is the length passed to SQLBindParameter. It must contain the space for NULL in the - // buffer when retrieving anything but SQLSRV_ENC_BINARY/SQL_C_BINARY - buffer_len = Z_STRLEN_P(param_z) - buffer_null_extra; + // buffer_len is the length passed to SQLBindParameter. It must contain the space for NULL in the + // buffer when retrieving anything but SQLSRV_ENC_BINARY/SQL_C_BINARY + buffer_len = Z_STRLEN_P(param_z) - buffer_null_extra; - // Zend string length doesn't include the null terminator - ZSTR_LEN(Z_STR_P(param_z)) -= elem_size; + // Zend string length doesn't include the null terminator + ZSTR_LEN(Z_STR_P(param_z)) -= elem_size; } - buffer = Z_STRVAL_P(param_z); + buffer = Z_STRVAL_P(param_z); // The StrLen_Ind_Ptr parameter of SQLBindParameter should contain the length of the data to send, which // may be less than the size of the buffer since the output may be more than the input. If it is greater, @@ -2973,7 +2973,7 @@ void sqlsrv_output_param_dtor( _Inout_ zval* data ) { sqlsrv_output_param *output_param = static_cast( Z_PTR_P( data )); zval_ptr_dtor( output_param->param_z ); // undo the reference to the string we will no longer hold - sqlsrv_free( output_param ); + sqlsrv_free( output_param ); } // called by Zend for each stream in the sqlsrv_stmt::param_streams hash table when it is cleaned/destroyed @@ -2981,7 +2981,7 @@ void sqlsrv_stream_dtor( _Inout_ zval* data ) { sqlsrv_stream* stream_encoding = static_cast( Z_PTR_P( data )); zval_ptr_dtor( stream_encoding->stream_z ); // undo the reference to the stream we will no longer hold - sqlsrv_free( stream_encoding ); + sqlsrv_free( stream_encoding ); } } diff --git a/source/shared/localizationimpl.cpp b/source/shared/localizationimpl.cpp index de61f2c26..9939f13ed 100644 --- a/source/shared/localizationimpl.cpp +++ b/source/shared/localizationimpl.cpp @@ -518,7 +518,7 @@ size_t SystemLocale::Utf8To16Strict( const char *src, SSIZE_T cchSrc, WCHAR *des if (usrc >= srcEnd || *usrc < 0x80 || *usrc > 0xBF || (*dest = (ucode & 0x1F)<<6 | (*usrc++ & 0x3F)) < 0x80) { - *dest = 0xFFFD; + goto Invalid; } dest++; } From d24a910234cb2cd676d7964a1d968d9994e8034d Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Tue, 27 Nov 2018 17:19:37 -0800 Subject: [PATCH 5/6] Switched to safe memcpy --- source/shared/core_util.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/shared/core_util.cpp b/source/shared/core_util.cpp index 071303abe..4a2b084a2 100644 --- a/source/shared/core_util.cpp +++ b/source/shared/core_util.cpp @@ -156,7 +156,7 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( } char* newString2 = reinterpret_cast( sqlsrv_malloc( rc + 1 /* NULL char*/ )); memset(newString2, '\0', rc+1); - memcpy(newString2, newString, rc+1); + memcpy_s(newString2, rc+1, newString, rc+1); sqlsrv_free( newString ); *outString = newString2; From 0bf58f8ce53ac1f6df7e730ed0be52afa1cbd842 Mon Sep 17 00:00:00 2001 From: David Puglielli Date: Fri, 30 Nov 2018 15:33:28 -0800 Subject: [PATCH 6/6] Small fixes --- source/shared/core_stmt.cpp | 13 ++++++++----- source/shared/core_util.cpp | 8 +++++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/source/shared/core_stmt.cpp b/source/shared/core_stmt.cpp index 51c648fe9..3b263fe22 100644 --- a/source/shared/core_stmt.cpp +++ b/source/shared/core_stmt.cpp @@ -1886,10 +1886,13 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve return true; } - // if the parameter is an input parameter, calc the size of the necessary buffer from the length of the string #ifndef _WIN32 + // Declare wchar_size to be the largest possible number of UTF-16 characters after + // conversion, to avoid the performance penalty of calling ToUtf16 wchar_size = buffer_len; #else + // Calculate the size of the necessary buffer from the length of the string - + // no performance penalty because MultiByteToWidechar is highly optimised wchar_size = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), NULL, 0 ); #endif // !_WIN32 @@ -1901,15 +1904,15 @@ bool convert_input_param_to_utf16( _In_ zval* input_param_z, _Inout_ zval* conve wbuffer = reinterpret_cast( sqlsrv_malloc( (wchar_size + 1) * sizeof( SQLWCHAR ) )); // convert the utf-8 string to a wchar string in the new buffer #ifndef _WIN32 - int r = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); + int rc = SystemLocale::ToUtf16Strict( CP_UTF8, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #else - int r = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); + int rc = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS, reinterpret_cast( buffer ), static_cast( buffer_len ), wbuffer, wchar_size ); #endif // !_WIN32 // if there was a problem converting the string, then free the memory and return false - if( r == 0 ) { + if( rc == 0 ) { return false; } - wchar_size = r; + wchar_size = rc; // null terminate the string, set the size within the zval, and return success wbuffer[ wchar_size ] = L'\0'; diff --git a/source/shared/core_util.cpp b/source/shared/core_util.cpp index 255f335a6..4aa2b37fd 100644 --- a/source/shared/core_util.cpp +++ b/source/shared/core_util.cpp @@ -127,10 +127,13 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( flags = WC_ERR_INVALID_CHARS; } - // calculate the number of characters needed #ifndef _WIN32 + // Allocate enough space to hold the largest possible number of bytes for UTF-8 conversion + // instead of calling FromUtf16, for performance reasons cchOutLen = 4*cchInLen; #else + // Calculate the number of output bytes required - no performance hit here because + // WideCharToMultiByte is highly optimised cchOutLen = WideCharToMultiByte( encoding, flags, inString, cchInLen, NULL, 0, NULL, NULL ); @@ -156,11 +159,10 @@ bool convert_string_from_utf16( _In_ SQLSRV_ENCODING encoding, _In_reads_bytes_( } char* newString2 = reinterpret_cast( sqlsrv_malloc( rc + 1 /* NULL char*/ )); memset(newString2, '\0', rc+1); - memcpy_s(newString2, rc+1, newString, rc+1); + memcpy_s(newString2, rc, newString, rc); sqlsrv_free( newString ); *outString = newString2; - newString2[rc] = '\0'; // null terminate the encoded string cchOutLen = rc; return true;