Skip to content

Commit

Permalink
bug fix: widen search breadth in alignment extension to always explor…
Browse files Browse the repository at this point in the history
…e equal paths
  • Loading branch information
hmusta committed Aug 3, 2021
1 parent 7ddb063 commit 70e6bb6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 13 deletions.
32 changes: 19 additions & 13 deletions metagraph/src/graph/alignment/aligner_extender_methods.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -412,15 +412,19 @@ std::vector<Alignment> DefaultColumnExtender::extend(score_t min_path_score) {
queue.emplace(best_score);

while (queue.size()) {
size_t i = std::get<2>(queue.top());
std::vector<TableIt> next_nodes{ queue.top() };
queue.pop();

// If a node has a single outgoing edge, which has the best score, then
// that node is taken without updating the heap. This loop ensures that
// that happens
bool nofork = true;
while (nofork) {
nofork = false;
// try all paths which have the same best partial alignment score
// (this performs a BFS-like search)
while (queue.size() && std::get<0>(queue.top()) == std::get<0>(next_nodes.back())) {
next_nodes.push_back(queue.top());
queue.pop();
}

while (next_nodes.size()) {
size_t i = std::get<2>(next_nodes.back());
next_nodes.pop_back();

std::vector<std::pair<node_index, char>> outgoing;
size_t next_offset = -1;
Expand Down Expand Up @@ -495,6 +499,7 @@ std::vector<Alignment> DefaultColumnExtender::extend(score_t min_path_score) {
ssize_t begin = prev_begin;
size_t end = std::min(prev_end, window.size()) + 1;

std::vector<TableIt> to_push;
for (const auto &[next, c] : outgoing) {
assert(std::get<0>(best_score) > xdrop_cutoff);

Expand Down Expand Up @@ -585,13 +590,14 @@ std::vector<Alignment> DefaultColumnExtender::extend(score_t min_path_score) {
// if this node has not been reached by a different
// alignment with a better score, continue
if (this->update_seed_filter(next, vec_offset, s_begin, s_end)) {
// if there is only one outgoing edge, which is the best,
// don't update the node traversal heap
if (outgoing.size() == 1 && max_val >= std::get<0>(queue.top())) {
nofork = true;
i = table.size() - 1;
// if this next node is the only next option, or if it's
// better than all other options, take it without pushing
// to the queue
if (outgoing.size() == 1 && (queue.empty() || next_score > queue.top())
&& (next_nodes.empty() || next_score > next_nodes.back())) {
next_nodes.emplace_back(std::move(next_score));
} else {
queue.emplace(next_score);
queue.emplace(std::move(next_score));
}
}
}
Expand Down
20 changes: 20 additions & 0 deletions metagraph/tests/graph/test_aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,26 @@ TYPED_TEST(DBGAlignerTest, align_seed_to_end) {
check_extend(graph, aligner.get_config(), paths, query);
}

TYPED_TEST(DBGAlignerTest, align_bfs_vs_dfs_xdrop) {
size_t k = 31;
std::string reference_1 = "TCGGGGCAAGAAACACACAGCCTTCTCATCCAAGGGCCTCAGTGATGAAGAGTACGATGAGTACAAGAGGATCAGAGAAGAAAGGAATGGCAAATACTCCATAGAAGAGTACCTTCAGGACAGGGACAGATACTATGAGGAGGTGGCCAT";
std::string reference_2 = "TCGGGGCAAGAAACACACAGCCTTCTCATCCAAGGGCCTCAGTGATGAAGAGTACGATGAGTACAAGAGAATCAGAGAGGAGAGGAATGGCAAATACTCAATAGAGGAATACCTCCAAGATAGGGACAGATACTATGAAGAGCTTGCCAT";
std::string query = "TCGGGGCAAGAAACACACAGCCTTCTCATCCAAGGGCCTCAGTGATGATGAGTACGATGAGTACAAGAGCATCAGAGAGGAGAGGAATGGCAAATACTCAATAGAGGAATACCTCCAAGATAGGGACAGATACTATGAAGAGCTTGCCAT";

auto graph = build_graph_batch<TypeParam>(k, { reference_1, reference_2 });
DBGAlignerConfig config(DBGAlignerConfig::dna_scoring_matrix(2, -3, -3));
config.xdrop = 27;
config.min_seed_length = 0;
config.max_seed_length = 0;
config.rel_score_cutoff = 0.8;
DBGAligner<> aligner(*graph, config);
auto paths = aligner.align(query);
ASSERT_EQ(1ull, paths.size());
auto path = paths[0];
EXPECT_EQ("48=1X20=1X80=", path.get_cigar().to_string());
check_json_dump_load(*graph, path, paths.get_query(), paths.get_query(PICK_REV_COMP));
}

TEST(DBGAlignerTest, align_dummy) {
size_t k = 7;
std::string reference = "AAAAGCTTTCGAGGCCAA";
Expand Down

0 comments on commit 70e6bb6

Please sign in to comment.