Skip to content
This repository has been archived by the owner on Jun 23, 2022. It is now read-only.

feat: optimize tcmalloc release memory #343

Merged
merged 10 commits into from
Nov 29, 2019
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/dist/replication/common/replication_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ replication_options::replication_options()
config_sync_interval_ms = 30000;

mem_release_enabled = true;
mem_release_interval_ms = 86400000;
mem_release_check_interval_ms = 3600000;
mem_release_tcmalloc_max_reserved_memory_percentage = 10;
hycdong marked this conversation as resolved.
Show resolved Hide resolved

lb_interval_ms = 10000;

Expand Down Expand Up @@ -479,11 +480,18 @@ void replication_options::initialize()
mem_release_enabled,
"whether to enable periodic memory release");

mem_release_interval_ms = (int)dsn_config_get_value_uint64(
mem_release_check_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_interval_ms",
mem_release_interval_ms,
"the replica releases its idle memory to the system every this period of time(ms)");
"mem_release_check_interval_ms",
mem_release_check_interval_ms,
"the replica check if should release memory to the system every this period of time(ms)");

mem_release_tcmalloc_max_reserved_memory_percentage = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_tcmalloc_max_reserved_memory_percentage",
mem_release_tcmalloc_max_reserved_memory_percentage,
"if tcmalloc reserved but not-used memory exceed this percentage of application allocated "
"memory, replica server will release the exceeding memory back to operating system");

lb_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
Expand Down
3 changes: 2 additions & 1 deletion src/dist/replication/common/replication_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class replication_options
int32_t config_sync_interval_ms;

bool mem_release_enabled;
int32_t mem_release_interval_ms;
int32_t mem_release_check_interval_ms;
int32_t mem_release_tcmalloc_max_reserved_memory_percentage;

int32_t lb_interval_ms;

Expand Down
87 changes: 76 additions & 11 deletions src/dist/replication/lib/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
_query_compact_command(nullptr),
_query_app_envs_command(nullptr),
_useless_dir_reserve_seconds_command(nullptr),
_mem_release_max_reserved_percentage_command(nullptr),
_deny_client(false),
_verbose_client_log(false),
_verbose_commit_log(false),
_gc_disk_error_replica_interval_seconds(3600),
_gc_disk_garbage_replica_interval_seconds(3600),
_mem_release_tcmalloc_max_reserved_memory_percentage(10),
_learn_app_concurrent_count(0),
_fs_manager(false)
{
Expand Down Expand Up @@ -317,6 +319,8 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
_verbose_commit_log = _options.verbose_commit_log_on_start;
_gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds;
_gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds;
_mem_release_tcmalloc_max_reserved_memory_percentage =
_options.mem_release_tcmalloc_max_reserved_memory_percentage;

// clear dirs if need
if (clear) {
Expand Down Expand Up @@ -638,17 +642,13 @@ void replica_stub::initialize_start()

#ifdef DSN_ENABLE_GPERF
if (_options.mem_release_enabled) {
_mem_release_timer_task =
tasking::enqueue_timer(LPC_MEM_RELEASE,
&_tracker,
[]() {
ddebug("Memory release has started...");
::MallocExtension::instance()->ReleaseFreeMemory();
ddebug("Memory release has ended...");
},
std::chrono::milliseconds(_options.mem_release_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_interval_ms));
_mem_release_timer_task = tasking::enqueue_timer(
LPC_MEM_RELEASE,
&_tracker,
std::bind(&replica_stub::gc_tcmalloc_memory, this),
std::chrono::milliseconds(_options.mem_release_check_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_check_interval_ms));
}
#endif

Expand Down Expand Up @@ -2053,6 +2053,33 @@ void replica_stub::open_service()
}
return result;
});

_mem_release_max_reserved_percentage_command =
dsn::command_manager::instance().register_app_command(
{"mem-release-max-reserved-percentage"},
"mem-release-max-reserved-percentage [num | DEFAULT]",
"control tcmalloc max reserved but not-used memory percentage",
[this](const std::vector<std::string> &args) {
std::string result("OK");
if (args.empty()) {
hycdong marked this conversation as resolved.
Show resolved Hide resolved
result = "mem-release-max-reserved-percentage=" +
std::to_string(_mem_release_tcmalloc_max_reserved_memory_percentage);
} else {
if (args[0] == "DEFAULT") {
_mem_release_tcmalloc_max_reserved_memory_percentage =
_options.mem_release_tcmalloc_max_reserved_memory_percentage;
} else {
int32_t percentage = 0;
if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 ||
percentage >= 100) {
result = std::string("ERR: invalid arguments");
} else {
_mem_release_tcmalloc_max_reserved_memory_percentage = percentage;
}
}
}
return result;
});
}

std::string
Expand Down Expand Up @@ -2178,6 +2205,8 @@ void replica_stub::close()
dsn::command_manager::instance().deregister_command(_query_compact_command);
dsn::command_manager::instance().deregister_command(_query_app_envs_command);
dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command);
dsn::command_manager::instance().deregister_command(
_mem_release_max_reserved_percentage_command);

_kill_partition_command = nullptr;
_deny_client_command = nullptr;
Expand All @@ -2187,6 +2216,7 @@ void replica_stub::close()
_query_compact_command = nullptr;
_query_app_envs_command = nullptr;
_useless_dir_reserve_seconds_command = nullptr;
_mem_release_max_reserved_percentage_command = nullptr;

if (_config_sync_timer_task != nullptr) {
_config_sync_timer_task->cancel(true);
Expand Down Expand Up @@ -2304,6 +2334,41 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str
return child_dir;
}

#ifdef DSN_ENABLE_GPERF
int64_t replica_stub::get_tcmalloc_numeric_property(const char *prop)
{
size_t value;
if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) {
derror_f("Failed to get tcmalloc property {}", prop);
return -1;
}
return value;
}

void replica_stub::gc_tcmalloc_memory()
{
int64_t total_allocated_bytes =
get_tcmalloc_numeric_property("generic.current_allocated_bytes");
int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes");
if (total_allocated_bytes == -1 || reserved_bytes == -1) {
return;
}

int64_t max_reserved_bytes =
total_allocated_bytes * _mem_release_tcmalloc_max_reserved_memory_percentage / 100.0;
if (reserved_bytes > max_reserved_bytes) {
int64_t release_bytes = reserved_bytes - max_reserved_bytes;
ddebug_f("Memory release started, almost {} bytes will be released", release_bytes);
while (release_bytes > 0) {
// tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking
// page heap for long time
::MallocExtension::instance()->ReleaseToSystem(1024 * 1024);
release_bytes -= 1024 * 1024;
}
}
}
#endif

//
// partition split
//
Expand Down
12 changes: 12 additions & 0 deletions src/dist/replication/lib/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,16 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
partition_status::type status,
error_code error);

#ifdef DSN_ENABLE_GPERF
// Get tcmalloc numeric property (name is "prop") value.
// Return -1 if get property failed (property we used will be greater than zero)
// Properties can be found in 'gperftools/malloc_extension.h'
int64_t get_tcmalloc_numeric_property(const char *prop);
hycdong marked this conversation as resolved.
Show resolved Hide resolved

// Try to release tcmalloc memory back to operating system
void gc_tcmalloc_memory();
#endif

private:
friend class ::dsn::replication::replication_checker;
friend class ::dsn::replication::test::test_checker;
Expand Down Expand Up @@ -305,12 +315,14 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
dsn_handle_t _query_compact_command;
dsn_handle_t _query_app_envs_command;
dsn_handle_t _useless_dir_reserve_seconds_command;
dsn_handle_t _mem_release_max_reserved_percentage_command;

bool _deny_client;
bool _verbose_client_log;
bool _verbose_commit_log;
int32_t _gc_disk_error_replica_interval_seconds;
int32_t _gc_disk_garbage_replica_interval_seconds;
int32_t _mem_release_tcmalloc_max_reserved_memory_percentage;

// we limit LT_APP max concurrent count, because nfs service implementation is
// too simple, it do not support priority.
Expand Down