Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Model locale fix #1

Merged
merged 8 commits into from
Sep 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "include/boost/compute"]
path = compute
url = https://github.com/boostorg/compute
[submodule "external_libs/fmt"]
path = external_libs/fmt
url = https://github.com/fmtlib/fmt.git
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ if(WIN32 AND (MINGW OR CYGWIN))
TARGET_LINK_LIBRARIES(_lightgbm IPHLPAPI)
endif()


if(BUILD_FOR_R)
if(MSVC)
TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_MSVC_CORE_LIBRARY})
Expand All @@ -323,6 +324,11 @@ if(BUILD_FOR_R)
endif(MSVC)
endif(BUILD_FOR_R)

# fmtlib/fmt
add_subdirectory(external_libs/fmt)
TARGET_LINK_LIBRARIES(lightgbm PUBLIC fmt::fmt)
TARGET_LINK_LIBRARIES(_lightgbm PUBLIC fmt::fmt)

install(TARGETS lightgbm _lightgbm
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
Expand Down
1 change: 1 addition & 0 deletions external_libs/fmt
Submodule fmt added at f67443
329 changes: 323 additions & 6 deletions include/LightGBM/utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include "../../../fmt/include/fmt/core.h"
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>

Expand Down Expand Up @@ -408,13 +409,11 @@ inline static void Int32ToStr(int32_t value, char* buffer) {
Uint32ToStr(u, buffer);
}



inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
#else
int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
#endif
CHECK_GE(num_chars, 0);
const std::string s = fmt::format("{:.17g}", value);
s.copy(buffer, buffer_len);
}

inline static const char* SkipSpaceAndTab(const char* p) {
Expand Down Expand Up @@ -1137,6 +1136,324 @@ class FunctionTimer {

extern Common::Timer global_timer;



namespace Common2 {


template<typename T>
inline static std::string Join(const std::vector<T>& strs, const char* delimiter) {
if (strs.empty()) {
return std::string("");
}
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << strs[0];
for (size_t i = 1; i < strs.size(); ++i) {
str_buf << delimiter;
str_buf << strs[i];
}
return str_buf.str();
}

template<>
inline std::string Join<int8_t>(const std::vector<int8_t>& strs, const char* delimiter) {
if (strs.empty()) {
return std::string("");
}
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << static_cast<int16_t>(strs[0]);
for (size_t i = 1; i < strs.size(); ++i) {
str_buf << delimiter;
str_buf << static_cast<int16_t>(strs[i]);
}
return str_buf.str();
}

template<typename T>
inline static std::string Join(const std::vector<T>& strs, size_t start, size_t end, const char* delimiter) {
if (end - start <= 0) {
return std::string("");
}
start = std::min(start, static_cast<size_t>(strs.size()) - 1);
end = std::min(end, static_cast<size_t>(strs.size()));
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << strs[start];
for (size_t i = start + 1; i < end; ++i) {
str_buf << delimiter;
str_buf << strs[i];
}
return str_buf.str();
}

inline static const char* Atof(const char* p, double* out) {
int frac;
double sign, value, scale;
*out = NAN;
// Skip leading white space, if any.
while (*p == ' ') {
++p;
}
// Get sign, if any.
sign = 1.0;
if (*p == '-') {
sign = -1.0;
++p;
} else if (*p == '+') {
++p;
}

// is a number
if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
// Get digits before decimal point or exponent, if any.
for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0 + (*p - '0');
}

// Get digits after decimal point, if any.
if (*p == '.') {
double right = 0.0;
int nn = 0;
++p;
while (*p >= '0' && *p <= '9') {
right = (*p - '0') + right * 10.0;
++nn;
++p;
}
value += right / Common::Pow(10.0, nn);
}

// Handle exponent, if any.
frac = 0;
scale = 1.0;
if ((*p == 'e') || (*p == 'E')) {
uint32_t expon;
// Get sign of exponent, if any.
++p;
if (*p == '-') {
frac = 1;
++p;
} else if (*p == '+') {
++p;
}
// Get digits of exponent, if any.
for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
expon = expon * 10 + (*p - '0');
}
if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0; expon -= 1; }
}
// Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale));
} else {
size_t cnt = 0;
while (*(p + cnt) != '\0' && *(p + cnt) != ' '
&& *(p + cnt) != '\t' && *(p + cnt) != ','
&& *(p + cnt) != '\n' && *(p + cnt) != '\r'
&& *(p + cnt) != ':') {
++cnt;
}
if (cnt > 0) {
std::string tmp_str(p, cnt);
std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), Common::tolower);
if (tmp_str == std::string("na") || tmp_str == std::string("nan") ||
tmp_str == std::string("null")) {
*out = NAN;
} else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
*out = sign * 1e308;
} else {
Log::Fatal("Unknown token %s in data file", tmp_str.c_str());
}
p += cnt;
}
}

while (*p == ' ') {
++p;
}

return p;
}

template<typename T, bool is_float>
struct __StringToTHelperFast {
const char* operator()(const char*p, T* out) const {
return LightGBM::Common::Atoi(p, out);
}
};

template<typename T>
struct __StringToTHelperFast<T, true> {
const char* operator()(const char*p, T* out) const {
double tmp = 0.0f;
auto ret = Atof(p, &tmp);
*out = static_cast<T>(tmp);
return ret;
}
};

template<typename T, bool is_float>
struct __StringToTHelper {
T operator()(const std::string& str) const {
T ret = 0;
Atoi(str.c_str(), &ret);
return ret;
}
};

template<typename T>
struct __StringToTHelper<T, true> {
T operator()(const std::string& str) const {
std::stringstream ss;
ss.imbue(std::locale("C"));
ss << str;
T tmp;
ss >> tmp;
return static_cast<T>(tmp);
//return static_cast<T>(std::stod(str));
}
};

template<typename T, bool is_float, bool is_unsign>
struct __TToStringHelperFast {
void operator()(T value, char* buffer, size_t) const {
LightGBM::Common::Int32ToStr(value, buffer);
}
};

template<typename T>
struct __TToStringHelperFast<T, true, false> {
void operator()(T value, char* buffer, size_t buf_len) const {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buf_len, "%g", value);
#else
int num_chars = snprintf(buffer, buf_len, "%g", value);
#endif
CHECK_GE(num_chars, 0);
}
};

template<typename T>
struct __TToStringHelperFast<T, false, true> {
void operator()(T value, char* buffer, size_t) const {
LightGBM::Common::Uint32ToStr(value, buffer);
}
};

inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
#else
int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
#endif
CHECK_GE(num_chars, 0);
}

template<typename T>
inline static std::vector<T> StringToArrayFast(const std::string& str, int n) {
if (n == 0) {
return std::vector<T>();
}
auto p_str = str.c_str();
__StringToTHelperFast<T, std::is_floating_point<T>::value> helper;
std::vector<T> ret(n);
for (int i = 0; i < n; ++i) {
p_str = helper(p_str, &ret[i]);
}
return ret;
}

template<typename T>
inline static std::vector<T> StringToArray(const std::string& str, char delimiter) {
std::vector<std::string> strs = LightGBM::Common::Split(str.c_str(), delimiter);
std::vector<T> ret;
ret.reserve(strs.size());
__StringToTHelper<T, std::is_floating_point<T>::value> helper;
for (const auto& s : strs) {
ret.push_back(helper(s));
}
return ret;
}

template<typename T>
inline static std::vector<T> StringToArray(const std::string& str, int n) {
if (n == 0) {
return std::vector<T>();
}
std::vector<std::string> strs = LightGBM::Common::Split(str.c_str(), ' ');
CHECK_EQ(strs.size(), static_cast<size_t>(n));
std::vector<T> ret;
ret.reserve(strs.size());
__StringToTHelper<T, std::is_floating_point<T>::value> helper;
for (const auto& s : strs) {
ret.push_back(helper(s));
}
return ret;
}

template<typename T>
inline static std::string ArrayToStringFast(const std::vector<T>& arr, size_t n) {
if (arr.empty() || n == 0) {
return std::string("");
}
__TToStringHelperFast<T, std::is_floating_point<T>::value, std::is_unsigned<T>::value> helper;
const size_t buf_len = 16;
std::vector<char> buffer(buf_len);
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
helper(arr[0], buffer.data(), buf_len);
str_buf << buffer.data();
for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
helper(arr[i], buffer.data(), buf_len);
str_buf << ' ' << buffer.data();
}
return str_buf.str();
}

inline static std::string ArrayToString(const std::vector<double>& arr, size_t n) {
if (arr.empty() || n == 0) {
return std::string("");
}
const size_t buf_len = 32;
std::vector<char> buffer(buf_len);
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
DoubleToStr(arr[0], buffer.data(), buf_len);
str_buf << buffer.data();
for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
DoubleToStr(arr[i], buffer.data(), buf_len);
str_buf << ' ' << buffer.data();
}
return str_buf.str();
}





#include <algorithm>

template <class T>
void cmp(std::vector<T> a, std::vector<T> b) {
if (a.size() != b.size()) {
Log::Fatal("Different array sizes! %d (expected=%d)", a.size(), b.size());
}

if (!std::equal(a.begin(), a.end(), b.begin()))
Log::Fatal("Different array contents!");
}

} // Namespace Common2


} // namespace LightGBM

#endif // LightGBM_UTILS_COMMON_FUN_H_
1 change: 1 addition & 0 deletions include/LightGBM/utils/text_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class TextReader {
Log::Fatal("Could not open %s", filename);
}
std::stringstream str_buf;
// Imbue C locale??? - Parameter?
char read_c;
size_t nread = reader->Read(&read_c, 1);
while (nread == 1) {
Expand Down
Loading