Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gpugraph v2 #57

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
a26eaa5
sync gpugraph to gpugraph_v2 (#86)
xuewujiao Aug 18, 2022
5529e61
[GPUGraph] graph sample v2 (#87)
Thunderbrook Aug 22, 2022
6ef6d45
Release cpu graph
Aug 23, 2022
a087a6d
uniq nodeid (#89)
Thunderbrook Aug 23, 2022
1372dc6
compatible whole HBM mode (#91)
chao9527 Aug 25, 2022
1ccd5e0
Gpugraph v2 (#93)
chao9527 Aug 29, 2022
0e04d15
split generate batch into multi stage (#92)
miaoli06 Aug 29, 2022
6a8ea82
[GpuGraph] Uniq feature (#95)
Thunderbrook Aug 29, 2022
591978f
[GpuGraph] global startid (#98)
Thunderbrook Aug 31, 2022
58a3ae2
load node edge seperately and release graph (#99)
miaoli06 Sep 2, 2022
854959e
v2 infer (#102)
Thunderbrook Sep 6, 2022
2bc6bf7
optimize begin pass and end pass (#106)
chao9527 Sep 8, 2022
b8b0d26
fix ins no (#104)
Thunderbrook Sep 9, 2022
582d236
[GPUGraph] fix FillOneStep args (#107)
Thunderbrook Sep 9, 2022
cc71f56
fix bug for whole hbm mode (#110)
chao9527 Sep 13, 2022
1462c54
[GPUGraph] fix infer && add infer_table_cap (#108)
Thunderbrook Sep 14, 2022
aed3925
【PSCORE】perform ssd sparse table (#111)
danleifeng Sep 15, 2022
2d61dde
fix sample core (#114)
Thunderbrook Sep 15, 2022
eef65cd
[GpuGraph] optimize shuffle batch (#115)
Thunderbrook Sep 16, 2022
057885a
release gpu mem when sample end (#116)
miaoli06 Sep 16, 2022
7d65319
fix class not found err (#118)
miaoli06 Sep 16, 2022
04110a5
optimize sample (#117)
chao9527 Sep 16, 2022
357a451
fix clear gpu mem (#119)
miaoli06 Sep 17, 2022
9f9c0ab
fix sample core (#121)
chao9527 Sep 20, 2022
bdd64ef
add ssd cache (#123)
danleifeng Sep 27, 2022
c6a07b2
add multi epoch train & fix train table change ins & save infer embed…
miaoli06 Sep 28, 2022
4380355
Add debug log (#131)
lxsbupt Oct 9, 2022
baf678f
optimize mem in uniq slot feature (#130)
Thunderbrook Oct 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions cmake/external/jemalloc.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
include(ExternalProject)

set(JEMALLOC_PROJECT "extern_jemalloc")
set(JEMALLOC_URL
https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2
)
set(JEMALLOC_BUILD ${THIRD_PARTY_PATH}/jemalloc/src/extern_jemalloc)
set(JEMALLOC_SOURCE_DIR "${THIRD_PARTY_PATH}/jemalloc")
set(JEMALLOC_INSTALL ${THIRD_PARTY_PATH}/install/jemalloc)
set(JEMALLOC_INCLUDE_DIR ${JEMALLOC_INSTALL}/include)
set(JEMALLOC_DOWNLOAD_DIR "${JEMALLOC_SOURCE_DIR}/src/${JEMALLOC_PROJECT}")

set(JEMALLOC_STATIC_LIBRARIES
${THIRD_PARTY_PATH}/install/jemalloc/lib/libjemalloc_pic.a)
set(JEMALLOC_LIBRARIES
${THIRD_PARTY_PATH}/install/jemalloc/lib/libjemalloc_pic.a)

ExternalProject_Add(
extern_jemalloc
PREFIX ${JEMALLOC_SOURCE_DIR}
URL ${JEMALLOC_URL}
INSTALL_DIR ${JEMALLOC_INSTALL}
DOWNLOAD_DIR "${JEMALLOC_DOWNLOAD_DIR}"
BUILD_COMMAND $(MAKE)
BUILD_IN_SOURCE 1
INSTALL_COMMAND $(MAKE) install
CONFIGURE_COMMAND "${JEMALLOC_DOWNLOAD_DIR}/configure"
--prefix=${JEMALLOC_INSTALL} --disable-initial-exec-tls)

add_library(jemalloc STATIC IMPORTED GLOBAL)
set_property(TARGET jemalloc PROPERTY IMPORTED_LOCATION
${JEMALLOC_STATIC_LIBRARIES})

include_directories(${JEMALLOC_INCLUDE_DIR})
add_dependencies(jemalloc extern_jemalloc)
34 changes: 30 additions & 4 deletions cmake/external/rocksdb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,13 @@

include(ExternalProject)

# find_package(jemalloc REQUIRED)

set(JEMALLOC_INCLUDE_DIR ${THIRD_PARTY_PATH}/install/jemalloc/include)
set(JEMALLOC_LIBRARIES
${THIRD_PARTY_PATH}/install/jemalloc/lib/libjemalloc_pic.a)
message(STATUS "rocksdb jemalloc:" ${JEMALLOC_LIBRARIES})

set(ROCKSDB_PREFIX_DIR ${THIRD_PARTY_PATH}/rocksdb)
set(ROCKSDB_INSTALL_DIR ${THIRD_PARTY_PATH}/install/rocksdb)
set(ROCKSDB_INCLUDE_DIR
Expand All @@ -22,22 +29,41 @@ set(ROCKSDB_INCLUDE_DIR
set(ROCKSDB_LIBRARIES
"${ROCKSDB_INSTALL_DIR}/lib/librocksdb.a"
CACHE FILEPATH "rocksdb library." FORCE)
set(ROCKSDB_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(ROCKSDB_COMMON_FLAGS
"-g -pipe -O2 -W -Wall -Wno-unused-parameter -fPIC -fno-builtin-memcmp -fno-omit-frame-pointer"
)
set(ROCKSDB_FLAGS
"-DNDEBUG -DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DOS_LINUX -DROCKSDB_FALLOCATE_PRESENT -DHAVE_SSE42 -DHAVE_PCLMUL -DZLIB -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_PTHREAD_ADAPTIVE_MUTEX -DROCKSDB_BACKTRACE -DROCKSDB_SUPPORT_THREAD_LOCAL -DROCKSDB_USE_RTTI -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_AUXV_GETAUXVAL_PRESENT"
)
set(ROCKSDB_CMAKE_CXX_FLAGS
"${ROCKSDB_COMMON_FLAGS} -DROCKSDB_LIBAIO_PRESENT -msse -msse4.2 -mpclmul ${ROCKSDB_FLAGS} -fPIC -I${JEMALLOC_INCLUDE_DIR}"
)
set(ROCKSDB_CMAKE_C_FLAGS
"${ROCKSDB_COMMON_FLAGS} ${ROCKSDB_FLAGS} -DROCKSDB_LIBAIO_PRESENT -fPIC -I${JEMALLOC_INCLUDE_DIR}"
)
include_directories(${ROCKSDB_INCLUDE_DIR})

set(CMAKE_CXX_LINK_EXECUTABLE
"${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt -lz")
ExternalProject_Add(
extern_rocksdb
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${ROCKSDB_PREFIX_DIR}
GIT_REPOSITORY "https://github.com/facebook/rocksdb"
GIT_TAG v6.10.1
GIT_REPOSITORY "https://github.com/Thunderbrook/rocksdb"
GIT_TAG 6.19.fb
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DWITH_BZ2=OFF
-DWITH_GFLAGS=OFF
-DWITH_TESTS=OFF
-DWITH_JEMALLOC=ON
-DWITH_BENCHMARK_TOOLS=OFF
-DJeMalloc_LIBRARIES=${JEMALLOC_LIBRARIES}
-DJeMalloc_INCLUDE_DIRS=${JEMALLOC_INCLUDE_DIR}
-DCMAKE_CXX_FLAGS=${ROCKSDB_CMAKE_CXX_FLAGS}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS=${ROCKSDB_CMAKE_C_FLAGS}
-DCMAKE_CXX_LINK_EXECUTABLE=${CMAKE_CXX_LINK_EXECUTABLE}
# BUILD_BYPRODUCTS ${ROCKSDB_PREFIX_DIR}/src/extern_rocksdb/librocksdb.a
INSTALL_COMMAND
mkdir -p ${ROCKSDB_INSTALL_DIR}/lib/ && cp
Expand Down
3 changes: 3 additions & 0 deletions cmake/third_party.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ if(WITH_PSCORE)

include(external/rocksdb) # download, build, install rocksdb
list(APPEND third_party_deps extern_rocksdb)

include(external/jemalloc) # download, build, install jemalloc
list(APPEND third_party_deps extern_jemalloc)
endif()

if(WITH_XBYAK)
Expand Down
15 changes: 13 additions & 2 deletions paddle/fluid/distributed/ps/service/ps_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,10 +148,12 @@ class PSClient {
return fut;
}

virtual ::std::future<int32_t> PullSparsePtr(char **select_values,
virtual ::std::future<int32_t> PullSparsePtr(int shard_id,
char **select_values,
size_t table_id,
const uint64_t *keys,
size_t num) {
size_t num,
uint16_t pass_id) {
VLOG(0) << "Did not implement";
std::promise<int32_t> promise;
std::future<int> fut = promise.get_future();
Expand All @@ -160,6 +162,15 @@ class PSClient {
}

virtual std::future<int32_t> PrintTableStat(uint32_t table_id) = 0;
virtual std::future<int32_t> SaveCacheTable(uint32_t table_id,
uint16_t pass_id,
size_t threshold) {
VLOG(0) << "Did not implement";
std::promise<int32_t> promise;
std::future<int> fut = promise.get_future();
promise.set_value(-1);
return fut;
}

// 确保所有积攒中的请求都发起发送
virtual std::future<int32_t> Flush() = 0;
Expand Down
30 changes: 28 additions & 2 deletions paddle/fluid/distributed/ps/service/ps_local_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,12 @@ ::std::future<int32_t> PsLocalClient::PushDense(const Region* regions,
// return done();
//}

::std::future<int32_t> PsLocalClient::PullSparsePtr(char** select_values,
::std::future<int32_t> PsLocalClient::PullSparsePtr(int shard_id,
char** select_values,
size_t table_id,
const uint64_t* keys,
size_t num) {
size_t num,
uint16_t pass_id) {
// FIXME
// auto timer =
// std::make_shared<CostTimer>("pslib_downpour_client_pull_sparse");
Expand All @@ -278,13 +280,37 @@ ::std::future<int32_t> PsLocalClient::PullSparsePtr(char** select_values,
table_context.pull_context.ptr_values = select_values;
table_context.use_ptr = true;
table_context.num = num;
table_context.shard_id = shard_id;
table_context.pass_id = pass_id;

// table_ptr->PullSparsePtr(select_values, keys, num);
table_ptr->Pull(table_context);

return done();
}

::std::future<int32_t> PsLocalClient::PrintTableStat(uint32_t table_id) {
auto* table_ptr = GetTable(table_id);
std::pair<int64_t, int64_t> ret = table_ptr->PrintTableStat();
VLOG(0) << "table id: " << table_id << ", feasign size: " << ret.first
<< ", mf size: " << ret.second;
return done();
}

::std::future<int32_t> PsLocalClient::SaveCacheTable(uint32_t table_id,
uint16_t pass_id,
size_t threshold) {
auto* table_ptr = GetTable(table_id);
std::pair<int64_t, int64_t> ret = table_ptr->PrintTableStat();
VLOG(0) << "table id: " << table_id << ", feasign size: " << ret.first
<< ", mf size: " << ret.second;
if (ret.first > threshold) {
VLOG(0) << "run cache table";
table_ptr->CacheTable(pass_id);
}
return done();
}

::std::future<int32_t> PsLocalClient::PushSparseRawGradient(
size_t table_id,
const uint64_t* keys,
Expand Down
17 changes: 9 additions & 8 deletions paddle/fluid/distributed/ps/service/ps_local_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,18 +76,19 @@ class PsLocalClient : public PSClient {
return fut;
}

virtual ::std::future<int32_t> PullSparsePtr(char** select_values,
virtual ::std::future<int32_t> PullSparsePtr(int shard_id,
char** select_values,
size_t table_id,
const uint64_t* keys,
size_t num);
size_t num,
uint16_t pass_id);

virtual ::std::future<int32_t> PrintTableStat(uint32_t table_id) {
std::promise<int32_t> prom;
std::future<int32_t> fut = prom.get_future();
prom.set_value(0);
virtual ::std::future<int32_t> PrintTableStat(uint32_t table_id);

virtual ::std::future<int32_t> SaveCacheTable(uint32_t table_id,
uint16_t pass_id,
size_t threshold);

return fut;
}
virtual ::std::future<int32_t> PushSparse(size_t table_id,
const uint64_t* keys,
const float** update_values,
Expand Down
9 changes: 9 additions & 0 deletions paddle/fluid/distributed/ps/table/accessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,15 @@ class ValueAccessor {
return 0;
}

virtual bool SaveMemCache(float* value,
int param,
double global_cache_threshold,
uint16_t pass_id) {
return true;
}

virtual void UpdatePassId(float* value, uint16_t pass_id) {}

virtual float GetField(float* value, const std::string& name) { return 0.0; }
#define DEFINE_GET_INDEX(class, field) \
virtual int get_##field##_index() override { return class ::field##_index(); }
Expand Down
Loading