Skip to content

Commit

Permalink
Pull AlbertoEAF/model-locale-fix code into this fork
Browse files Browse the repository at this point in the history
Port all the custom code from AlbertoEAF/LightGBM code to this Feedzai fork
  • Loading branch information
AlbertoEAF authored Sep 15, 2020
2 parents 164818b + 244ff1d commit 1b74a97
Show file tree
Hide file tree
Showing 7 changed files with 412 additions and 58 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "include/boost/compute"]
path = compute
url = https://github.com/boostorg/compute
[submodule "external_libs/fmt"]
path = external_libs/fmt
url = https://github.com/fmtlib/fmt.git
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ if(WIN32 AND (MINGW OR CYGWIN))
TARGET_LINK_LIBRARIES(_lightgbm IPHLPAPI)
endif()


if(BUILD_FOR_R)
if(MSVC)
TARGET_LINK_LIBRARIES(_lightgbm ${LIBR_MSVC_CORE_LIBRARY})
Expand All @@ -323,6 +324,11 @@ if(BUILD_FOR_R)
endif(MSVC)
endif(BUILD_FOR_R)

# fmtlib/fmt
add_subdirectory(external_libs/fmt)
TARGET_LINK_LIBRARIES(lightgbm PUBLIC fmt::fmt)
TARGET_LINK_LIBRARIES(_lightgbm PUBLIC fmt::fmt)

install(TARGETS lightgbm _lightgbm
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
Expand Down
1 change: 1 addition & 0 deletions external_libs/fmt
Submodule fmt added at f67443
329 changes: 323 additions & 6 deletions include/LightGBM/utils/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#ifndef LIGHTGBM_UTILS_COMMON_FUN_H_
#define LIGHTGBM_UTILS_COMMON_FUN_H_

#include "../../../fmt/include/fmt/core.h"
#include <LightGBM/utils/log.h>
#include <LightGBM/utils/openmp_wrapper.h>

Expand Down Expand Up @@ -408,13 +409,11 @@ inline static void Int32ToStr(int32_t value, char* buffer) {
Uint32ToStr(u, buffer);
}



inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
#else
int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
#endif
CHECK_GE(num_chars, 0);
const std::string s = fmt::format("{:.17g}", value);
s.copy(buffer, buffer_len);
}

inline static const char* SkipSpaceAndTab(const char* p) {
Expand Down Expand Up @@ -1137,6 +1136,324 @@ class FunctionTimer {

extern Common::Timer global_timer;



namespace Common2 {


template<typename T>
inline static std::string Join(const std::vector<T>& strs, const char* delimiter) {
if (strs.empty()) {
return std::string("");
}
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << strs[0];
for (size_t i = 1; i < strs.size(); ++i) {
str_buf << delimiter;
str_buf << strs[i];
}
return str_buf.str();
}

template<>
inline std::string Join<int8_t>(const std::vector<int8_t>& strs, const char* delimiter) {
if (strs.empty()) {
return std::string("");
}
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << static_cast<int16_t>(strs[0]);
for (size_t i = 1; i < strs.size(); ++i) {
str_buf << delimiter;
str_buf << static_cast<int16_t>(strs[i]);
}
return str_buf.str();
}

template<typename T>
inline static std::string Join(const std::vector<T>& strs, size_t start, size_t end, const char* delimiter) {
if (end - start <= 0) {
return std::string("");
}
start = std::min(start, static_cast<size_t>(strs.size()) - 1);
end = std::min(end, static_cast<size_t>(strs.size()));
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
str_buf << std::setprecision(std::numeric_limits<double>::digits10 + 2);
str_buf << strs[start];
for (size_t i = start + 1; i < end; ++i) {
str_buf << delimiter;
str_buf << strs[i];
}
return str_buf.str();
}

inline static const char* Atof(const char* p, double* out) {
int frac;
double sign, value, scale;
*out = NAN;
// Skip leading white space, if any.
while (*p == ' ') {
++p;
}
// Get sign, if any.
sign = 1.0;
if (*p == '-') {
sign = -1.0;
++p;
} else if (*p == '+') {
++p;
}

// is a number
if ((*p >= '0' && *p <= '9') || *p == '.' || *p == 'e' || *p == 'E') {
// Get digits before decimal point or exponent, if any.
for (value = 0.0; *p >= '0' && *p <= '9'; ++p) {
value = value * 10.0 + (*p - '0');
}

// Get digits after decimal point, if any.
if (*p == '.') {
double right = 0.0;
int nn = 0;
++p;
while (*p >= '0' && *p <= '9') {
right = (*p - '0') + right * 10.0;
++nn;
++p;
}
value += right / Common::Pow(10.0, nn);
}

// Handle exponent, if any.
frac = 0;
scale = 1.0;
if ((*p == 'e') || (*p == 'E')) {
uint32_t expon;
// Get sign of exponent, if any.
++p;
if (*p == '-') {
frac = 1;
++p;
} else if (*p == '+') {
++p;
}
// Get digits of exponent, if any.
for (expon = 0; *p >= '0' && *p <= '9'; ++p) {
expon = expon * 10 + (*p - '0');
}
if (expon > 308) expon = 308;
// Calculate scaling factor.
while (expon >= 50) { scale *= 1E50; expon -= 50; }
while (expon >= 8) { scale *= 1E8; expon -= 8; }
while (expon > 0) { scale *= 10.0; expon -= 1; }
}
// Return signed and scaled floating point result.
*out = sign * (frac ? (value / scale) : (value * scale));
} else {
size_t cnt = 0;
while (*(p + cnt) != '\0' && *(p + cnt) != ' '
&& *(p + cnt) != '\t' && *(p + cnt) != ','
&& *(p + cnt) != '\n' && *(p + cnt) != '\r'
&& *(p + cnt) != ':') {
++cnt;
}
if (cnt > 0) {
std::string tmp_str(p, cnt);
std::transform(tmp_str.begin(), tmp_str.end(), tmp_str.begin(), Common::tolower);
if (tmp_str == std::string("na") || tmp_str == std::string("nan") ||
tmp_str == std::string("null")) {
*out = NAN;
} else if (tmp_str == std::string("inf") || tmp_str == std::string("infinity")) {
*out = sign * 1e308;
} else {
Log::Fatal("Unknown token %s in data file", tmp_str.c_str());
}
p += cnt;
}
}

while (*p == ' ') {
++p;
}

return p;
}

template<typename T, bool is_float>
struct __StringToTHelperFast {
const char* operator()(const char*p, T* out) const {
return LightGBM::Common::Atoi(p, out);
}
};

template<typename T>
struct __StringToTHelperFast<T, true> {
const char* operator()(const char*p, T* out) const {
double tmp = 0.0f;
auto ret = Atof(p, &tmp);
*out = static_cast<T>(tmp);
return ret;
}
};

template<typename T, bool is_float>
struct __StringToTHelper {
T operator()(const std::string& str) const {
T ret = 0;
Atoi(str.c_str(), &ret);
return ret;
}
};

template<typename T>
struct __StringToTHelper<T, true> {
T operator()(const std::string& str) const {
std::stringstream ss;
ss.imbue(std::locale("C"));
ss << str;
T tmp;
ss >> tmp;
return static_cast<T>(tmp);
//return static_cast<T>(std::stod(str));
}
};

template<typename T, bool is_float, bool is_unsign>
struct __TToStringHelperFast {
void operator()(T value, char* buffer, size_t) const {
LightGBM::Common::Int32ToStr(value, buffer);
}
};

template<typename T>
struct __TToStringHelperFast<T, true, false> {
void operator()(T value, char* buffer, size_t buf_len) const {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buf_len, "%g", value);
#else
int num_chars = snprintf(buffer, buf_len, "%g", value);
#endif
CHECK_GE(num_chars, 0);
}
};

template<typename T>
struct __TToStringHelperFast<T, false, true> {
void operator()(T value, char* buffer, size_t) const {
LightGBM::Common::Uint32ToStr(value, buffer);
}
};

inline static void DoubleToStr(double value, char* buffer, size_t buffer_len) {
#ifdef _MSC_VER
int num_chars = sprintf_s(buffer, buffer_len, "%.17g", value);
#else
int num_chars = snprintf(buffer, buffer_len, "%.17g", value);
#endif
CHECK_GE(num_chars, 0);
}

template<typename T>
inline static std::vector<T> StringToArrayFast(const std::string& str, int n) {
if (n == 0) {
return std::vector<T>();
}
auto p_str = str.c_str();
__StringToTHelperFast<T, std::is_floating_point<T>::value> helper;
std::vector<T> ret(n);
for (int i = 0; i < n; ++i) {
p_str = helper(p_str, &ret[i]);
}
return ret;
}

template<typename T>
inline static std::vector<T> StringToArray(const std::string& str, char delimiter) {
std::vector<std::string> strs = LightGBM::Common::Split(str.c_str(), delimiter);
std::vector<T> ret;
ret.reserve(strs.size());
__StringToTHelper<T, std::is_floating_point<T>::value> helper;
for (const auto& s : strs) {
ret.push_back(helper(s));
}
return ret;
}

template<typename T>
inline static std::vector<T> StringToArray(const std::string& str, int n) {
if (n == 0) {
return std::vector<T>();
}
std::vector<std::string> strs = LightGBM::Common::Split(str.c_str(), ' ');
CHECK_EQ(strs.size(), static_cast<size_t>(n));
std::vector<T> ret;
ret.reserve(strs.size());
__StringToTHelper<T, std::is_floating_point<T>::value> helper;
for (const auto& s : strs) {
ret.push_back(helper(s));
}
return ret;
}

template<typename T>
inline static std::string ArrayToStringFast(const std::vector<T>& arr, size_t n) {
if (arr.empty() || n == 0) {
return std::string("");
}
__TToStringHelperFast<T, std::is_floating_point<T>::value, std::is_unsigned<T>::value> helper;
const size_t buf_len = 16;
std::vector<char> buffer(buf_len);
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
helper(arr[0], buffer.data(), buf_len);
str_buf << buffer.data();
for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
helper(arr[i], buffer.data(), buf_len);
str_buf << ' ' << buffer.data();
}
return str_buf.str();
}

inline static std::string ArrayToString(const std::vector<double>& arr, size_t n) {
if (arr.empty() || n == 0) {
return std::string("");
}
const size_t buf_len = 32;
std::vector<char> buffer(buf_len);
std::stringstream str_buf;
str_buf.imbue(std::locale("C"));
DoubleToStr(arr[0], buffer.data(), buf_len);
str_buf << buffer.data();
for (size_t i = 1; i < std::min(n, arr.size()); ++i) {
DoubleToStr(arr[i], buffer.data(), buf_len);
str_buf << ' ' << buffer.data();
}
return str_buf.str();
}





#include <algorithm>

template <class T>
void cmp(std::vector<T> a, std::vector<T> b) {
if (a.size() != b.size()) {
Log::Fatal("Different array sizes! %d (expected=%d)", a.size(), b.size());
}

if (!std::equal(a.begin(), a.end(), b.begin()))
Log::Fatal("Different array contents!");
}

} // Namespace Common2


} // namespace LightGBM

#endif // LightGBM_UTILS_COMMON_FUN_H_
1 change: 1 addition & 0 deletions include/LightGBM/utils/text_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class TextReader {
Log::Fatal("Could not open %s", filename);
}
std::stringstream str_buf;
// Imbue C locale??? - Parameter?
char read_c;
size_t nread = reader->Read(&read_c, 1);
while (nread == 1) {
Expand Down
Loading

0 comments on commit 1b74a97

Please sign in to comment.