Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track memory of RAM filesystem #352

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/Make.helper.cmake"
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lib/structure_tree.cpp.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/lib/structure_tree.cpp" @ONLY)

configure_file("${CMAKE_CURRENT_SOURCE_DIR}/lib/memory_management.cpp.cmake"
"${CMAKE_CURRENT_SOURCE_DIR}/lib/memory_management.cpp" @ONLY)


find_package(Git)
if(GIT_FOUND)
message("git found: ${GIT_EXECUTABLE}")
Expand Down
40 changes: 34 additions & 6 deletions examples/int-vector-mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,20 @@

using namespace sdsl;
using namespace std;


int main(int argc, char* argv[])
{
std::cout << "main() " << std::endl;
if (argc < 1) {
cout << "Usage: " << argv[0] << endl;
cout << "(1) Writes an int_vector sequentially to a file" << endl;
cout << "(2) Streams the content from file" << endl;
cout << "(3) Remove the file" << endl;
return 1;
}
string tmp_file = "tmp_file.sdsl";
std::cout << "A " << std::endl;
string tmp_file = "@tmp_file.sdsl";
size_t size = 10000000;
std::mt19937_64 rng(13);
uint8_t width = 0;
Expand All @@ -35,10 +38,13 @@ int main(int argc, char* argv[])
width = iv.width();
store_to_file(iv,tmp_file);
}
std::cout << "store to file " << tmp_file << std::endl;

// (2) open readonly! memory map the content of tmp_file
{
std::cout << "start mapper " << tmp_file << std::endl;
const int_vector_mapper<0,std::ios_base::in> ivm(tmp_file);
std::cout << "stop mapper " << tmp_file << std::endl;
if (ivm.size() != size) {
std::cerr << "ERROR: ivm.size()="<< ivm.size() << " != " << size << std::endl;
return 1;
Expand Down Expand Up @@ -103,16 +109,38 @@ int main(int argc, char* argv[])
sdsl::remove(tmp_file);
}

int_vector<> v(1000000,0,16);
for(size_t i=0; i<v.size(); ++i){
v[i] = i;
}

{
auto tmp_buf = temp_file_buffer<64>::create();
for (const auto& val : stdv) {
tmp_buf.push_back(val);
auto tmp_buf = write_out_mapper<0>::create("@test",v.size(),v.width());
std::cout<<"tmp_buf.size()="<<tmp_buf.size()<<" v.size()="<<v.size()<<std::endl;
std::cout<<"tmp_buf.width()="<<(size_t)tmp_buf.width()<<" v.width()="<<(size_t)v.width()<<std::endl;
for (size_t i=0; i<v.size(); ++i) {
tmp_buf[i] = v[i];
if ( tmp_buf[i] != v[i] ) {
std::cout<<"i="<<i<<" tmp_buf[i]="<<tmp_buf[i]<<" != "<<v[i]<<std::endl;
break;
}
}
if (tmp_buf != stdv) {
if (tmp_buf != v) {
std::cerr << "ERROR: tmp_buf CMP failed." << std::endl;
}
}

// tmp buf file is deleted automatically
{
std::cout<<"file_size="<<ram_fs::file_size("@test")<<std::endl;
int_vector<> vv;
load_from_file(vv, "@test");
std::cout<<"v.size()="<<v.size()<<" ? "<<vv.size()<<std::endl;
for(size_t i=0; i<v.size(); ++i){
if (v[i] != vv[i]) {
std::cout<<"i="<<i<<"v[i]="<<v[i]<<" != "<<vv[i]<<std::endl;
break;
}
}
}

return 0;
Expand Down
64 changes: 64 additions & 0 deletions examples/mem-vis.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#include <sdsl/suffix_trees.hpp>
#include <iostream>

using namespace sdsl;
using namespace std;

using namespace std::chrono;
using timer = std::chrono::high_resolution_clock;

int main(int argc, char** argv)
{
if (argc < 2) {
cout << "Usage: " << argv[0] << " file" << endl;
cout << " Creates a CST and CSA for a byte file and visualizes the memory utilization during construction." << endl;
return 1;
}

if(0){
memory_monitor::start();

csa_sada<> csa;
auto start = timer::now();
construct(csa, argv[1], 1);
auto stop = timer::now();
cout << "construction csa time in seconds: " << duration_cast<seconds>(stop-start).count() << endl;

memory_monitor::stop();
std::ofstream csaofs("csa-construction_file.html");
cout << "writing memory usage visualization to csa-construction.html\n";
memory_monitor::write_memory_log<HTML_FORMAT>(csaofs);
csaofs.close();
}

{
//read file
std::string text;
{
std::ostringstream ss;
std::ifstream ifs(argv[1]);
ss << ifs.rdbuf();
text = ss.str();
}

memory_monitor::start();

csa_sada<> csa;
auto start = timer::now();
/*
{
construct_im(csa, std::move(text), 1);
}
*/
construct(csa, argv[1], 1);
auto stop = timer::now();
cout << "construction csa time in seconds: " << duration_cast<seconds>(stop-start).count() << endl;

memory_monitor::stop();
std::ofstream csaofs("csa-construction_im.html");
cout << "writing memory usage visualization to csa-construction.html\n";
memory_monitor::write_memory_log<HTML_FORMAT>(csaofs);
csaofs.close();
}

}
7 changes: 7 additions & 0 deletions include/sdsl/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
#include <map>
#include <string>

#ifndef MSVC_COMPILER
#define SDSL_UNUSED __attribute__ ((unused))
#else
#define SDSL_UNUSED
#endif

namespace sdsl
{
namespace conf // namespace for library constant
Expand Down Expand Up @@ -35,6 +41,7 @@ enum byte_sa_algo_type {LIBDIVSUFSORT, SE_SAIS};
//! Helper class for construction process
struct cache_config {
bool delete_files; // Flag which indicates if all files which were created
bool delete_data; // Flag which indicates if the original data can be deleted
// during construction should be deleted.
std::string dir; // Directory for temporary files.
std::string id; // Identifier is part of temporary file names. If
Expand Down
34 changes: 26 additions & 8 deletions include/sdsl/construct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
#ifndef INCLUDED_SDSL_CONSTRUCT
#define INCLUDED_SDSL_CONSTRUCT

#include "int_vector_mapper.hpp"
#include "sdsl_concepts.hpp"
#include "int_vector.hpp"
#include "construct_lcp.hpp"
#include "construct_bwt.hpp"
#include "construct_sa.hpp"
#include <string>
#include <type_traits>

namespace sdsl
{
Expand All @@ -53,22 +55,23 @@ void append_zero_symbol(int_vector& text)


template<class t_index>
void construct(t_index& idx, std::string file, uint8_t num_bytes=0)
void construct(t_index& idx, std::string file, uint8_t num_bytes=0, bool move_input=false)
{
tMSS file_map;
cache_config config;
if (is_ram_file(file)) {
config.dir = "@";
config.delete_data = move_input;
}
construct(idx, file, config, num_bytes);
}

template<class t_index, class t_data>
void construct_im(t_index& idx, t_data data, uint8_t num_bytes=0)
void construct_im(t_index& idx, t_data&& data, uint8_t num_bytes=0)
{
std::string tmp_file = ram_file_name(util::to_string(util::pid())+"_"+util::to_string(util::id()));
store_to_file(data, tmp_file);
construct(idx, tmp_file, num_bytes);
construct(idx, tmp_file, num_bytes, std::is_rvalue_reference<t_data&&>::value);
ram_fs::remove(tmp_file);
}

Expand Down Expand Up @@ -121,22 +124,37 @@ template<class t_index>
void construct(t_index& idx, const std::string& file, cache_config& config, uint8_t num_bytes, csa_tag)
{
auto event = memory_monitor::event("construct CSA");
const char* KEY_TEXT = key_text_trait<t_index::alphabet_category::WIDTH>::KEY_TEXT;
const char* KEY_BWT = key_bwt_trait<t_index::alphabet_category::WIDTH>::KEY_BWT;
typedef int_vector<t_index::alphabet_category::WIDTH> text_type;
constexpr auto width = t_index::alphabet_category::WIDTH;
const char* KEY_TEXT = key_text_trait<width>::KEY_TEXT;
const char* KEY_BWT = key_bwt_trait<width>::KEY_BWT;
typedef int_vector<width> text_type;
{
auto event = memory_monitor::event("parse input text");
// (1) check, if the text is cached
if (!cache_file_exists(KEY_TEXT, config)) {
text_type text;
load_vector_from_file(text, file, num_bytes);
if (contains_no_zero_symbol(text, file)) {
append_zero_symbol(text);
store_to_cache(text,KEY_TEXT, config);
if ( !is_ram_file(file) ) {
append_zero_symbol(text);
store_to_cache(text, KEY_TEXT, config);
} else {
auto text_mapper = write_out_mapper<width>::create(
cache_file_name(KEY_TEXT, config),
text.size()+1,
text.width()
);
std::copy(text.begin(), text.end(), text_mapper.begin());
text_mapper[text.size()] = 0;
}
}
}
register_cache_file(KEY_TEXT, config);
}
if ( config.delete_data )
{
sdsl::remove(file);
}
{
// (2) check, if the suffix array is cached
auto event = memory_monitor::event("SA");
Expand Down
24 changes: 14 additions & 10 deletions include/sdsl/construct_bwt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define INCLUDED_SDSL_CONSTRUCT_BWT

#include "int_vector.hpp"
#include "int_vector_mapper.hpp"
#include "sfstream.hpp"
#include "util.hpp"
#include "config.hpp" // for cache_config
Expand Down Expand Up @@ -57,23 +58,26 @@ void construct_bwt(cache_config& config)
const char* KEY_BWT = key_bwt_trait<t_width>::KEY_BWT;

// (1) Load text from disk
text_type text;
load_from_cache(text, KEY_TEXT, config);
read_only_mapper<t_width> text(KEY_TEXT, config);
// text_type text;
// load_from_cache(text, KEY_TEXT, config);
// std::cout<<"TEXT="<<(char*)text.data()<<std::endl;
size_type n = text.size();
uint8_t bwt_width = text.width();

// (2) Prepare to stream SA from disc and BWT to disc
size_type buffer_size = 1000000; // buffer_size is a multiple of 8!, TODO: still true?
size_type buffer_size = 1000000; // buffer_size is a multiple of 8!
int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config), std::ios::in, buffer_size);
std::string bwt_file = cache_file_name(KEY_BWT, config);
bwt_type bwt_buf(bwt_file, std::ios::out, buffer_size, bwt_width);

// (3) Construct BWT sequentially by streaming SA and random access to text
size_type to_add[2] = {(size_type)-1,n-1};
for (size_type i=0; i < n; ++i) {
bwt_buf[i] = text[ sa_buf[i]+to_add[sa_buf[i]==0] ];
// bwt_type bwt_buf(bwt_file, std::ios::out, buffer_size, bwt_width);
{
auto bwt_mapper = write_out_mapper<t_width>::create(bwt_file, n, bwt_width);
// (3) Construct BWT sequentially by streaming SA and random access to text
size_type to_add[2] = {(size_type)-1,n-1};
for (size_type i=0; i < n; ++i) {
bwt_mapper[i] = text[ sa_buf[i]+to_add[sa_buf[i]==0] ];
}
}
bwt_buf.close();
register_cache_file(KEY_BWT, config);
}

Expand Down
Loading