Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wt_Topk #226

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 24 additions & 21 deletions benchmark/wavelet_trees/src/wt_time_and_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,26 +46,27 @@ uint64_t test_inverse_select(const t_wt& wt, const vector<size_type>& is, uint64
return cnt;
}

// test interval_symbols
// test ys_in_x_range
template<class t_wt>
uint64_t
test_interval_symbols(typename enable_if<!(has_node_type<t_wt>::value),
t_wt>::type&, const vector<size_type>&, const vector<size_type>&, size_type&, uint64_t, uint64_t)
test_ys_in_x_range(typename enable_if<!(has_node_type<t_wt>::value),
t_wt>::type&, const vector<size_type>&, const vector<size_type>&, uint64_t, uint64_t)
{
return 0; // interval_symbols not implemented
return 0; // ys_in_x_range not implemented
}

template<class t_wt>
uint64_t
test_interval_symbols(typename enable_if<has_node_type<t_wt>::value,
t_wt>::type& wt, const vector<size_type>& is, const vector<size_type>& js, size_type& k, uint64_t mask, uint64_t times=100000000)
test_ys_in_x_range(typename enable_if<has_node_type<t_wt>::value,
t_wt>::type& wt, const vector<size_type>& is, const vector<size_type>& js, uint64_t mask, uint64_t times=100000000)
{
vector<value_type> tmp(wt.sigma);
vector<size_type> tmp2(wt.sigma);
uint64_t cnt=0;
for (uint64_t i=0; i<times; ++i) {
interval_symbols(wt, is[i&mask], js[i&mask], k, tmp, tmp2, tmp2);
cnt += k;
auto y_it = ys_in_x_range(wt, is[i&mask], js[i&mask]);
while (y_it) {
cnt += (std::get<2>(*y_it)-std::get<1>(*y_it));
++y_it;
}
}
return cnt;
}
Expand Down Expand Up @@ -171,20 +172,24 @@ void prepare_for_select(const t_iv& iv, vector<value_type>& cs, vector<size_type

template<class t_wt>
struct wt_trait {
static uint64_t test_access(const t_wt& wt, const vector<size_type>& is, uint64_t mask, uint64_t times=100000000) {
static uint64_t test_access(const t_wt& wt, const vector<size_type>& is, uint64_t mask, uint64_t times=100000000)
{
return ::test_access(wt, is, mask, times);
}
static uint64_t test_inverse_select(const t_wt& wt, const vector<size_type>& is, uint64_t mask, uint64_t times=100000000) {
static uint64_t test_inverse_select(const t_wt& wt, const vector<size_type>& is, uint64_t mask, uint64_t times=100000000)
{
return ::test_inverse_select(wt, is, mask, times);
}
};

template<class t_rac, class t_bitvector, class t_select, class t_select_zero>
struct wt_trait<wt_gmr_rs<t_rac, t_bitvector, t_select, t_select_zero>> {
static uint64_t test_access(const wt_gmr_rs<t_rac, t_bitvector, t_select, t_select_zero>&, const vector<size_type>&, uint64_t, uint64_t) {
static uint64_t test_access(const wt_gmr_rs<t_rac, t_bitvector, t_select, t_select_zero>&, const vector<size_type>&, uint64_t, uint64_t)
{
return 0;
}
static uint64_t test_inverse_select(const wt_gmr_rs<t_rac, t_bitvector, t_select, t_select_zero>&, const vector<size_type>&, uint64_t, uint64_t) {
static uint64_t test_inverse_select(const wt_gmr_rs<t_rac, t_bitvector, t_select, t_select_zero>&, const vector<size_type>&, uint64_t, uint64_t)
{
return 0;
}
};
Expand All @@ -203,8 +208,6 @@ int main(int argc, char* argv[])
uint64_t check = 0;
uint64_t size = 1<<log_s;

// create values
size_type k = 0;
vector<value_type> cs(size);
vector<size_type> is(size);
vector<size_type> is2(size);
Expand Down Expand Up @@ -255,13 +258,13 @@ int main(int argc, char* argv[])
cout << "# inverse_select_time = " << duration_cast<microseconds>(stop-start).count()/(double)reps << endl;
cout << "# inverse_select_check = " << check << endl;

// interval_symbols
const uint64_t reps_interval_symbols = wt.sigma < 10000 ? reps : reps/100;
// ys_in_x_range
const uint64_t reps_ys_in_x_range = wt.sigma < 10000 ? reps : reps/100;
start = timer::now();
check = test_interval_symbols<WT_TYPE>(wt, is, js, k, mask, reps_interval_symbols);
check = test_ys_in_x_range<WT_TYPE>(wt, is, js, mask, reps_ys_in_x_range);
stop = timer::now();
cout << "# interval_symbols_time = " << duration_cast<microseconds>(stop-start).count()/(double)reps_interval_symbols << endl;
cout << "# interval_symbols_check = " << check << endl;
cout << "# ys_in_x_range_time = " << duration_cast<microseconds>(stop-start).count()/(double)reps_ys_in_x_range << endl;
cout << "# ys_in_x_range_check = " << check << endl;

// lex_count
start = timer::now();
Expand Down
4 changes: 2 additions & 2 deletions benchmark/wavelet_trees/visualize/wt.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,9 @@ for(tc in unique(maindata$TC_ID)){
widths=c(1.35,1), heights=c(1,1,1))

#interval-symbols-plot
ivs <-data['interval_symbols_time']
ivs <-data['ys_in_x_range_time']
rownames(ivs)<-id
plot_time_figure(t(ivs),"\\tt{interval\\_symbols}",xmax=max(xmax,max(ivs)))
plot_time_figure(t(ivs),"\\tt{ys\\_in\\_x\\_range}",xmax=max(xmax,max(ivs)))

#constructor-plot
con <-data['constructs_time']
Expand Down
31 changes: 31 additions & 0 deletions examples/top_wt.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include <iostream>
#include <sdsl/wt_topk.hpp>
#include <sdsl/wt_topk_algorithm.hpp>

using namespace std;
using namespace sdsl;

int main()
{
wt_topk<> wtk;
construct_im(wtk, {{0,0,2},{1,2,3},{2,1,2},{3,0,2},{4,0,1},{5,1,4},{6,0,1},{7,1,1},{8,0,8},{9,2,5}});
wtk.print_info();

auto topk_it = top_k(wtk, {2,0}, {7,1});
while (topk_it) {
auto point_weight = *topk_it;
cout << point_weight.first <<" weight: "<<point_weight.second << endl;
++topk_it;
}

wt_topk<wt_int<>,rmq_succinct_sct<false>, dac_vector<>, false> wtk2;
construct_im(wtk2, {{0,0,2},{1,2,3},{2,1,2},{3,0,2},{4,0,1},{5,1,4},{6,0,1},{7,1,1},{8,0,8},{9,2,5}});
wtk2.print_info();

auto topk_it2 = top_k(wtk2, {2,0}, {7,1});
while (topk_it2) {
auto point_weight = *topk_it2;
cout << point_weight.first <<" weight: "<<point_weight.second << endl;
++topk_it2;
}
}
36 changes: 36 additions & 0 deletions examples/ys_in_xrange.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include <sdsl/wavelet_trees.hpp>
#include <sdsl/wt_algorithm.hpp>
#include <iostream>
#include <stack>

using namespace sdsl;
using namespace std;


int main()
{
typedef wt_int<> t_wt;
t_wt wt;
construct_im(wt, "9 4 3 2 1 4 6 3 1 4 6 5 3 2 1 3 5 3 2 3 4",'d');
cout << wt << endl;
auto y_it = ys_in_x_range(wt, 0, wt.size());
while (y_it) {
auto y = *y_it;
cout << get<0>(y) << " ("<< get<1>(y) << "," << get<2>(y) << ")" << endl;
++y_it;
}

cout << "count[0,"<<wt.size()-1<<"][1,16] = " << count(wt, {0, wt.size()-1}, {1,16}) << endl;
cout << "count[0,"<<wt.size()-1<<"][1,8] = " << count(wt, {0, wt.size()-1}, {1,8}) << endl;
cout << "count[0,"<<wt.size()-1<<"][2,3] = " << count(wt, {0, wt.size()-1}, {2,3}) << endl;


cout << "map_to_sorted( {"<<wt<<"}, [5,13], [0,5] )" << endl;
auto mts_it = map_to_sorted_sequence(wt, {5, 13}, {0,5});
while (mts_it) {
cout << get<0>(*mts_it);
cout << " ["<<get<0>(get<1>(*mts_it)) <<","<<get<1>(get<1>(*mts_it))<<"]"<<endl;
++mts_it;
}
}

13 changes: 4 additions & 9 deletions include/sdsl/construct_lcp_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,11 @@ void insert_lcp_values(int_vector<>& partial_lcp, bit_vector& index_done, std::s
template<class tWT>
void create_C_array(std::vector<uint64_t>& C, const tWT& wt)
{
uint64_t quantity; // quantity of characters in interval
std::vector<unsigned char> cs(wt.sigma); // list of characters in the interval
std::vector<uint64_t> rank_c_i(wt.sigma); // number of occurrence of character in [0 .. i-1]
std::vector<uint64_t> rank_c_j(wt.sigma); // number of occurrence of character in [0 .. j-1]

C = std::vector<uint64_t>(257, 0);
interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j);
for (uint64_t i=0; i<quantity; ++i) {
unsigned char c = cs[i];
C[c+1] = rank_c_j[i];
auto y_it = ys_in_x_range(wt, 0, wt.size());
for (uint64_t i=0; y_it; ++i, ++y_it) {
unsigned char c = std::get<0>(*y_it);
C[c+1] = std::get<2>(*y_it);
}
for (uint64_t i=1; i<C.size()-1; ++i) {
C[i+1] += C[i];
Expand Down
2 changes: 2 additions & 0 deletions include/sdsl/k2_treap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ class k2_treap
static_assert(t_k<=16, "t_k has to be smaller than 17.");

public:
enum { permuted_x = false };

typedef int_vector<>::size_type size_type;
using node_type = k2_treap_ns::node_type;
using point_type = k2_treap_ns::point_type;
Expand Down
91 changes: 54 additions & 37 deletions include/sdsl/k2_treap_algorithm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@
namespace sdsl
{

// forward declaration
template<uint8_t t_k,
typename t_bv,
typename t_rank,
typename t_max_vec>
class k2_treap;



namespace k2_treap_ns
{

Expand Down Expand Up @@ -271,13 +280,17 @@ class range_iterator
* \return Iterator to result in decreasing order.
* \pre real(p1) <= real(p2) and imag(p1)<=imag(p2)
*/
template<typename t_k2_treap>
k2_treap_ns::top_k_iterator<t_k2_treap>
top_k(const t_k2_treap& t,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2)

template<uint8_t t_k,
typename t_bv,
typename t_rank,
typename t_max_vec>
k2_treap_ns::top_k_iterator<k2_treap<t_k, t_bv, t_rank, t_max_vec>>
top_k(const k2_treap<t_k, t_bv, t_rank, t_max_vec>& t,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2)
{
return k2_treap_ns::top_k_iterator<t_k2_treap>(t, p1, p2);
return k2_treap_ns::top_k_iterator<k2_treap<t_k, t_bv, t_rank, t_max_vec>>(t, p1, p2);
}


Expand All @@ -290,25 +303,32 @@ top_k(const t_k2_treap& t,
* \pre real(p1) <= real(p2) and imag(p1)<=imag(p2)
* real(range) <= imag(range)
*/
template<typename t_k2_treap>
k2_treap_ns::range_iterator<t_k2_treap>
range_3d(const t_k2_treap& t,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2,
k2_treap_ns::range_type range)
template<uint8_t t_k,
typename t_bv,
typename t_rank,
typename t_max_vec>
k2_treap_ns::range_iterator<k2_treap<t_k, t_bv, t_rank, t_max_vec>>
range_3d(const k2_treap<t_k, t_bv, t_rank, t_max_vec>& t,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2,
k2_treap_ns::range_type range)
{
return k2_treap_ns::range_iterator<t_k2_treap>(t, p1, p2, range);
return k2_treap_ns::range_iterator<k2_treap<t_k, t_bv, t_rank, t_max_vec>>(t, p1, p2, range);
}

namespace k2_treap_ns
{

// forward declaration
template<typename t_k2_treap>
uint64_t __count(const t_k2_treap&, typename t_k2_treap::node_type);

// forward declaration
template<typename t_k2_treap>
uint64_t _count(const t_k2_treap&, k2_treap_ns::point_type,
k2_treap_ns::point_type, typename t_k2_treap::node_type);
uint64_t _count(const t_k2_treap&, point_type,
point_type, typename t_k2_treap::node_type);

}

//! Count how many points are in the rectangle (p1,p2)
/*! \param treap k2-treap
Expand All @@ -317,34 +337,38 @@ uint64_t _count(const t_k2_treap&, k2_treap_ns::point_type,
* \return The number of points in rectangle (p1,p2).
* \pre real(p1) <= real(p2) and imag(p1)<=imag(p2)
*/
template<typename t_k2_treap>
template<uint8_t t_k,
typename t_bv,
typename t_rank,
typename t_max_vec>
uint64_t
count(const t_k2_treap& treap,
count(const k2_treap<t_k, t_bv, t_rank, t_max_vec>& t,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2)
{
if (treap.size() > 0) {
return _count(treap, p1, p2, treap.root());
if (t.size() > 0) {
return k2_treap_ns::_count(t, p1, p2, t.root());
}
return 0;
}

namespace k2_treap_ns
{

template<typename t_k2_treap>
uint64_t
_count(const t_k2_treap& treap,
k2_treap_ns::point_type p1,
k2_treap_ns::point_type p2,
_count(const t_k2_treap& t,
point_type p1,
point_type p2,
typename t_k2_treap::node_type v)
{
using namespace k2_treap_ns;
if (contained<t_k2_treap::k>(p1, p2, v)) {
return __count(treap, v);
return __count(t, v);
} else if (overlap<t_k2_treap::k>(p1, p2, v)) {
uint64_t res = contained(v.max_p, p1, p2);
auto nodes = treap.children(v);
auto nodes = t.children(v);
for (auto node : nodes) {
res += _count(treap, p1, p2, node);
res += _count(t, p1, p2, node);
}
return res;
}
Expand All @@ -354,24 +378,17 @@ _count(const t_k2_treap& treap,

template<typename t_k2_treap>
uint64_t
__count(const t_k2_treap& treap,
__count(const t_k2_treap& t,
typename t_k2_treap::node_type v)
{
uint64_t res = 1; // count the point at the node
auto nodes = treap.children(v);
auto nodes = t.children(v);
for (auto node : nodes)
res += __count(treap, node);
res += __count(t, node);
return res;
}


// forward declaration
template<uint8_t t_k,
typename t_bv,
typename t_rank,
typename t_max_vec>
class k2_treap;

}

//! Specialized version of method ,,construct'' for k2_treaps.
template<uint8_t t_k,
Expand Down
Loading