From a70f8403307bd711a61e2146d6ce70b5db58b60d Mon Sep 17 00:00:00 2001 From: kcleal Date: Mon, 18 Apr 2022 19:08:31 +0100 Subject: [PATCH] v1.3.10 Fixed X/= style cigar compatibility issues. Improvements to merging --- dysgu/assembler.pyx | 90 ++-- dysgu/call_component.pyx | 131 +++-- dysgu/cluster.pyx | 72 ++- dysgu/graph.pyx | 135 +++-- dysgu/robin_hood.h | 1063 +++++++++++++++++++++++++------------- setup.py | 2 +- 6 files changed, 947 insertions(+), 546 deletions(-) diff --git a/dysgu/assembler.pyx b/dysgu/assembler.pyx index 50fa7d7..b4a450c 100644 --- a/dysgu/assembler.pyx +++ b/dysgu/assembler.pyx @@ -147,7 +147,7 @@ cdef void add_to_graph(DiGraph& G, AlignedSegment r, cpp_vector[int]& nweight, T cigar, current_pos, i = trim_cigar(cigar, pos, approx_position) for opp, length in cigar: - with nogil: + # with nogil: if done: break @@ -225,10 +225,10 @@ cdef void add_to_graph(DiGraph& G, AlignedSegment r, cpp_vector[int]& nweight, T G.updateEdge(prev_node, n, qual) prev_node = n - current_pos += 1 # <-- Reference pos increases 1 + # current_pos += 1 # <-- Reference pos increases 1 elif opp == 2: # deletion - current_pos += length + 1 + current_pos += length # + 1 elif opp == 0 or opp == 7 or opp == 8 or opp == 3: # All match, match (=), mis-match (X), N's @@ -270,10 +270,12 @@ cdef void add_to_graph(DiGraph& G, AlignedSegment r, cpp_vector[int]& nweight, T G.updateEdge(prev_node, n, qual) prev_node = n + current_pos += 1 + start = False -cdef int topo_sort2(DiGraph& G, cpp_deque[int]& order): # except -1: +cdef int topo_sort2(DiGraph& G, cpp_deque[int]& order, r): # except -1: cdef unordered_set[int] seen cdef unordered_set[int] explored @@ -286,51 +288,53 @@ cdef int topo_sort2(DiGraph& G, cpp_deque[int]& order): # except -1: cdef cpp_vector[int] debug_res - with nogil: + # with nogil: - for v in range(G.numberOfNodes()): # process all vertices in G - if explored.find(v) != explored.end(): - continue + for v in range(G.numberOfNodes()): # process all vertices in G + if explored.find(v) != explored.end(): + continue - fringe.clear() - fringe.push_back(v) # nodes yet to look at + fringe.clear() + fringe.push_back(v) # nodes yet to look at - while fringe.size() != 0: + while fringe.size() != 0: - w = fringe.back() # depth first search - if explored.find(w) != explored.end(): # already looked down this branch - fringe.pop_back() - continue + w = fringe.back() # depth first search + if explored.find(w) != explored.end(): # already looked down this branch + fringe.pop_back() + continue - seen.insert(w) + seen.insert(w) - # Check successors for cycles and for new nodes - if new_nodes.size() > 0: - new_nodes.clear() + # Check successors for cycles and for new nodes + if new_nodes.size() > 0: + new_nodes.clear() - neighbors = G.neighbors(w) - for n in neighbors: - if explored.find(n) == explored.end(): + neighbors = G.neighbors(w) + for n in neighbors: + if explored.find(n) == explored.end(): - if seen.find(n) != seen.end(): #CYCLE !! - order.clear() - order.push_back(-1) - order.push_back(n) - order.push_back(w) - # return order - graph_node_2_vec(n, debug_res) - raise ValueError("Graph contains a cycle. Please report this. n={}, w={}, v={}. Node info n was: {}, {}, {}, {}".format(n, w, v, debug_res[0], debug_res[1], debug_res[2], debug_res[4])) + if seen.find(n) != seen.end(): #CYCLE !! + order.clear() + order.push_back(-1) + order.push_back(n) + order.push_back(w) + # return order + graph_node_2_vec(n, debug_res) + # echo("Graph contains a cycle. Please report this. n={}, w={}, v={}. Node info n was: {}, {}, {}, {}".format(n, w, v, debug_res[0], debug_res[1], debug_res[2], debug_res[4])) - new_nodes.push_back(n) + raise ValueError("Graph contains a cycle. Please report this. n={}, w={}, v={}. Node info n was: {}, {}, {}, {}".format(n, w, v, debug_res[0], debug_res[1], debug_res[2], debug_res[4])) - if new_nodes.size() > 0: # Add new_nodes to fringe - fringe.insert(fringe.end(), new_nodes.begin(), new_nodes.end()) # Extend + new_nodes.push_back(n) - else: # No new nodes so w is fully explored - explored.insert(w) + if new_nodes.size() > 0: # Add new_nodes to fringe + fringe.insert(fringe.end(), new_nodes.begin(), new_nodes.end()) # Extend - order.push_front(w) - fringe.pop_back() # done considering this node + else: # No new nodes so w is fully explored + explored.insert(w) + + order.push_front(w) + fringe.pop_back() # done considering this node cdef cpp_deque[int] score_best_path(DiGraph& G, cpp_deque[int]& nodes_to_visit, cpp_vector[int]& n_weights): @@ -418,7 +422,7 @@ cdef dict get_consensus(rd, int position, int max_distance): cdef DiGraph G = DiGraph() cdef TwoWayMap ndict_r2 cdef cpp_vector[int] node_weights - + r = None for r in rd: if r.seq is None: continue @@ -428,7 +432,15 @@ cdef dict get_consensus(rd, int position, int max_distance): add_to_graph(G, r, node_weights, ndict_r2, position, max_distance) cdef cpp_deque[int] nodes_to_visit2 - return_code = topo_sort2(G, nodes_to_visit2) + + # try: + return_code = topo_sort2(G, nodes_to_visit2, r) + + # except ValueError: + # echo("was -1") + # for r in rd: + # echo(r.qname, r.pos) + if return_code == -1 or nodes_to_visit2.size() < 50: return {} diff --git a/dysgu/call_component.pyx b/dysgu/call_component.pyx index c80fb98..2daf041 100644 --- a/dysgu/call_component.pyx +++ b/dysgu/call_component.pyx @@ -304,7 +304,7 @@ cdef guess_informative_pair(aligns): b_cigar_info, b = aligns[1] # check for paired-end read through with no SA tag - if a.flag & 1 and a_cigar_info[5] == -1 and b_cigar_info[5] == -1: + if a.flag & 1 and a_cigar_info.cigar_index == -1 and b_cigar_info.cigar_index == -1: if a.pos == b.pos and a.reference_end == b.reference_end: extent_left_same = True extent_right_same = True @@ -316,9 +316,9 @@ cdef guess_informative_pair(aligns): return None # within read sv - if 0 < a_cigar_info[5] < len(a.cigartuples) - 1: - cigar_index = a_cigar_info[5] - event_pos = a_cigar_info[6] + if 0 < a_cigar_info.cigar_index < len(a.cigartuples) - 1: + cigar_index = a_cigar_info.cigar_index + event_pos = a_cigar_info.event_pos ci = a.cigartuples[cigar_index] return (ci[0], a.rname, @@ -328,9 +328,9 @@ cdef guess_informative_pair(aligns): a, cigar_index) - elif 0 < b_cigar_info[5] < len(b.cigartuples) - 1: - cigar_index = b_cigar_info[5] - event_pos = b_cigar_info[6] + elif 0 < b_cigar_info.cigar_index < len(b.cigartuples) - 1: + cigar_index = b_cigar_info.cigar_index + event_pos = b_cigar_info.event_pos ci = b.cigartuples[cigar_index] return (ci[0], b.rname, @@ -340,8 +340,8 @@ cdef guess_informative_pair(aligns): b, cigar_index) - a_pos = a_cigar_info[6] # Position may have been inferred from SA tag, use this if available - b_pos = b_cigar_info[6] + a_pos = a_cigar_info.event_pos # Position may have been inferred from SA tag, use this if available + b_pos = b_cigar_info.event_pos if a_pos == b_pos: # make sure different breaks are mapped if (a.cigartuples[0][0] == 4 and b.cigartuples[0][0] == 4) or (a.cigartuples[-1][0] == 4 and b.cigartuples[-1][0] == 4): @@ -802,7 +802,7 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l if n_templates == 1: # Filter uninteresting reads - if not any(not i.flag & 1 or not i.flag & 2 or i.rname != i.rnext or node_info[5] != 2 or + if not any(not i.flag & 1 or not i.flag & 2 or i.rname != i.rnext or node_info.cigar_index != 2 or (i.flag & 1 and abs(i.tlen) > min_distance) for node_info, i in rds): @@ -842,10 +842,10 @@ cdef single(rds, int insert_size, int insert_stdev, float insert_ppf, int clip_l else: # Single alignment, check spanning cigar_info, a = alignments[0] - cigar_index = cigar_info[5] + cigar_index = cigar_info.cigar_index #[5] if 0 < cigar_index < len(a.cigartuples) - 1: # Alignment spans SV - event_pos = cigar_info[6] + event_pos = cigar_info.event_pos #[6] ci = a.cigartuples[cigar_index] spanning_alignments.append((ci[0], a.rname, @@ -1261,14 +1261,14 @@ cdef void make_call(informative, breakA_precise, breakB_precise, svtype, jointyp else: lens.append(i.inferred_sv_len) if len(lens) > 0: - svlen = value_closest_to_mean(lens) #int(np.mean(lens)) + svlen = value_closest_to_mean(lens) if main_svlen > 0 and (svlen / main_svlen) > 0.7: svlen_precise = 1 else: svlen = main_svlen else: if len(inferred_lens) > 0: - svlen = value_closest_to_mean(inferred_lens) #int(np.mean(inferred_lens)) + svlen = value_closest_to_mean(inferred_lens) else: svlen = main_svlen @@ -1581,7 +1581,9 @@ cdef call_from_reads(u_reads, v_reads, int insert_size, int insert_stdev, float v_reads.append(v_item.read_b) er = EventResult() - + # for item in sub_informative: + # if item.read_a.qname == "D00360:18:H8VC6ADXX:1:1210:7039:44052": + # echo("here") make_call(sub_informative, precise_a, precise_b, svtype, jointype, insert_size, insert_stdev, er) count_attributes2(u_reads, v_reads, [], extended_tags, insert_ppf, [], er) @@ -1612,9 +1614,43 @@ cdef call_from_reads(u_reads, v_reads, int insert_size, int insert_stdev, float return results +cdef filter_single_partitions(u_reads, v_reads): + # rare, but single reads with >2 alignments can have multiple alignments end up in one block. These should be + # processed as singles + u_counts = defaultdict(list) + v_counts = defaultdict(list) + any_u_grouped = False + any_v_grouped = False + + for cigar_info, a in u_reads: + u_counts[(a.is_read1, a.qname)].append((cigar_info, a)) + if len(u_counts[(a.is_read1, a.qname)]) > 1: + any_u_grouped = True + for cigar_info, a in v_reads: + v_counts[(a.is_read1, a.qname)].append((cigar_info, a)) + if len(v_counts[(a.is_read1, a.qname)]) > 1: + any_v_grouped = True + if not any_u_grouped and not any_v_grouped: + return u_reads, v_reads, None, None + + single_u, single_v, actual_u, actual_v = [], [], [], [] + for k, v in u_counts.items(): + if len(v) == 1: + actual_u += v + else: + single_u += v + for k, v in v_counts.items(): + if len(v) == 1: + actual_v += v + else: + single_v += v + + return actual_u, actual_v, single_u, single_v + + cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int insert_stdev, float insert_ppf, int min_support, int block_edge, int assemble, int extended_tags, info): - #print("starting one edge") + spanning_alignments = [] u_reads = [] v_reads = [] @@ -1624,9 +1660,9 @@ cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int if not a.cigartuples: continue u_reads.append(a) - cigar_index = cigar_info[5] + cigar_index = cigar_info.cigar_index if 0 < cigar_index < len(a.cigartuples) - 1: # Alignment spans SV - event_pos = cigar_info[6] + event_pos = cigar_info.event_pos ci = a.cigartuples[cigar_index] spanning_alignments.append((ci[0], a.rname, @@ -1640,9 +1676,9 @@ cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int if not a.cigartuples: continue v_reads.append(a) - cigar_index = cigar_info[5] + cigar_index = cigar_info.cigar_index if 0 < cigar_index < len(a.cigartuples) - 1: # Alignment spans SV - event_pos = cigar_info[6] + event_pos = cigar_info.event_pos ci = a.cigartuples[cigar_index] spanning_alignments.append((ci[0], a.rname, @@ -1748,7 +1784,7 @@ cdef one_edge(u_reads_info, v_reads_info, int clip_length, int insert_size, int def fpos_srt(x): - return x[0][4] + return x[0].tell cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, sites_index): @@ -1770,28 +1806,27 @@ cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, site if int_node in buffered_reads: aligns.append((n, buffered_reads[int_node])) continue + # # def as_tuple(self): + # # return self.h, self.f, self.p, self.c, self.t, self.cigar_index, self.event_pos + # p = n[4] + # node = (n[0], n[1], n[2], n[3], p) # drop cigar index and event pos - p = n[4] - node = (n[0], n[1], n[2], n[3], p) # drop cigar index and event pos - - fpos.append((node, int_node, n)) - - # improve chances of reading nearby locations sequentially - fpos = sorted(fpos, key=fpos_srt) + fpos.append((n, int_node)) - for node, int_node, save_node in fpos: + for node, int_node in sorted(fpos, key=fpos_srt): - p = node[4] + # p = node[4] - infile.seek(p) + infile.seek(node.tell) try: a = next(infile) except StopIteration: return aligns v = xxhasher(bam_get_qname(a._delegate), len(a.qname), 42) - if (v, a.flag, a.pos, a.rname, p) == node: - aligns.append((save_node, a)) + # if (v, a.flag, a.pos, a.rname, p) == node: + if v == node.hash_name and a.flag == node.flag and a.pos == node.pos and a.rname == node.chrom: + aligns.append((node, a)) if add_to_buffer: buffered_reads[int_node] = a # Add to buffer, then block nodes with multi-edges dont need collecting twice @@ -1806,8 +1841,9 @@ cdef get_reads(infile, nodes_info, buffered_reads, n2n, bint add_to_buffer, site steps += 1 v = xxhasher(bam_get_qname(a._delegate), len(a.qname), 42) - if (v, a.flag, a.pos, a.rname, p) == node: - aligns.append((save_node, a)) + # if (v, a.flag, a.pos, a.rname, p) == node: + if v == node.hash_name and a.flag == node.flag and a.pos == node.pos and a.rname == node.chrom: + aligns.append((node, a)) if add_to_buffer: buffered_reads[int_node] = a @@ -1841,6 +1877,7 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf, sites_info = [] # u and v are the part ids, d[0] and d[1] are the lists of nodes for those parts + for (u, v), d in data["s_between"].items(): rd_u = get_reads(bam, d[0], data["reads"], data["n2n"], add_to_buffer, info) # [(Nodeinfo, alignment)..] @@ -1862,8 +1899,28 @@ cdef list multi(data, bam, int insert_size, int insert_stdev, float insert_ppf, if v in seen: seen.remove(v) - events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs, - extended_tags, sites_info) + # finds reads that should be a single partition + u_reads, v_reads, u_single, v_single = filter_single_partitions(rd_u, rd_v) + + if len(u_reads) > 0 and len(v_reads) > 0: + events += one_edge(rd_u, rd_v, clip_length, insert_size, insert_stdev, insert_ppf, min_support, 1, assemble_contigs, + extended_tags, sites_info) + if u_single: + res = single(u_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs, + extended_tags, sites_info) + if res: + if isinstance(res, EventResult): + events.append(res) + else: + events += res + if v_single: + res = single(v_single, insert_size, insert_stdev, insert_ppf, clip_length, min_support, assemble_contigs, + extended_tags, sites_info) + if res: + if isinstance(res, EventResult): + events.append(res) + else: + events += res # Process any singles / unconnected blocks if seen: diff --git a/dysgu/cluster.pyx b/dysgu/cluster.pyx index 5e3f48c..4e9df04 100644 --- a/dysgu/cluster.pyx +++ b/dysgu/cluster.pyx @@ -151,6 +151,7 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re seen = set([]) pad = 100 + disjoint_nodes = set([]) # if a component has more than one disjoint nodes it needs to be broken apart for ei, ej, idx, jdx in event_iter: @@ -228,8 +229,13 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re continue recpi_overlap = is_reciprocal_overlapping(ei.posA, ei.posB, ej.posA, ej.posB) - overlap = max(ei.posA, ej.posA) - min(ei.posB, ej.posB) + overlap = max(0, min(ei.posA, ej.posA) - max(ei.posB, ej.posB)) + if paired_end: + if ei.spanning > 0 and ej.spanning > 0 and overlap == 0: + disjoint_nodes.add(i_id) + disjoint_nodes.add(j_id) + continue # If long reads only rely on reciprocal overlap, seems to work better if paired_end: spd = span_similarity(ei, ej) @@ -247,21 +253,6 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re else: l_ratio = min(ei.svlen, ej.svlen) / ml - # this didnt work - # merge events that might be deletion of tandem duplication - # if paired_end and ei.svtype == "DEL": - # if l_ratio < 0.9: - # if both_imprecise and min(ei.remap_score, ej.remap_score) > 50: - # continue - # elif overlap < -15: - # continue - - # this worked ok - # echo(ei.remap_score, ej.remap_score, recpi_overlap, l_ratio) - # if max(ei.remap_score, ej.remap_score) > 50 and not recpi_overlap: - # # if max(ei.remap_score, ej.remap_score) > 50 and max(ei.spanning, ej.spanning) > 0: - # continue - if ei.svtype == "INS": if aggressive_ins_merge: m = True @@ -287,8 +278,8 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re if not m: continue # if ei.posA == 66323 and ej.posA == 66323: - # echo(ml, l_ratio, one_is_imprecise, any_contigs_to_check, (ei.remap_score, ej.remap_score), - # (ei.svlen, ej.svlen), (ei.event_id, ej.event_id), recpi_overlap, spd, loci_similar, loci_same, "overlap", overlap) + # echo((ei.event_id, ej.event_id), ml, l_ratio, one_is_imprecise, any_contigs_to_check, (ei.remap_score, ej.remap_score), + # (ei.svlen, ej.svlen), recpi_overlap, spd, loci_similar, loci_same, "overlap", overlap) # Loci are similar, check contig match or reciprocal overlap if not any_contigs_to_check: @@ -335,24 +326,48 @@ def enumerate_events(G, potential, max_dist, try_rev, tree, paired_end=False, re G.add_edge(i_id, j_id, loci_same=True) continue - return G + return G, disjoint_nodes -def cut_components(G): +def cut_components(G, disjoint_nodes): e = G.edges(data=True) G2 = nx.Graph([i for i in e if i[2]["loci_same"] == True]) for u in G.nodes(): if u not in G2: e0 = next(G.edges(u).__iter__()) # Use first edge out of u to connect G2.add_edge(*e0) - return nx.algorithms.components.connected_components(G2) + components = nx.algorithms.components.connected_components(G2) + if len(disjoint_nodes) > 0: + # try split this component into disjoint sets. This method works for small cluster sizes (most of the time) + # but can fail when there are many disjoint nodes. Label propagation might be needed for these + components2 = [] + for c in components: + n_disjoin = set([]) + for node in c: + if node in disjoint_nodes: + n_disjoin.add(node) + if len(n_disjoin) <= 1: + components2.append(c) + continue + + out_e = defaultdict(list) + for node in n_disjoin: + for neigh in G.neighbors(node): + out_e[neigh].append(node) + + G3 = nx.Graph() + for k, v in out_e.items(): + G3.add_edge(k, random.choice(v)) # randomly assign to one of the sets + + components2 += list(nx.algorithms.components.connected_components(G3)) + return components2 + return components cpdef srt_func(c): - # keeper_bias = 0 if not c.site_info else 10000 if c.type != "pe" and c.type != "": - return 100 + c.su # + keeper_bias - return c.su + (3 * c.spanning) # + keeper_bias + return 100 + c.su + return c.su + (3 * c.spanning) def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pick_best=False, add_partners=False, @@ -367,9 +382,9 @@ def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pic # Cluster events on graph G = nx.Graph() - G = enumerate_events(G, potential, max_dist, try_rev, tree, paired_end, rel_diffs, diffs, same_sample, - aggressive_ins_merge=aggressive_ins_merge, - debug=debug) + G, disjoint_nodes = enumerate_events(G, potential, max_dist, try_rev, tree, paired_end, rel_diffs, diffs, same_sample, + aggressive_ins_merge=aggressive_ins_merge, + debug=debug) found = [] for item in potential: # Add singletons, non-merged @@ -378,8 +393,9 @@ def merge_events(potential, max_dist, tree, paired_end=False, try_rev=False, pic # Try and merge SVs with identical breaks, then merge ones with less accurate breaks - this helps prevent # over merging SVs that are close together - components = cut_components(G) + components = cut_components(G, disjoint_nodes) node_to_event = {i.event_id: i for i in potential} + cdef int k # Only keep edges with loci_same==False if removing the edge leads to an isolated node for grp in components: diff --git a/dysgu/graph.pyx b/dysgu/graph.pyx index df84940..03c3a03 100644 --- a/dysgu/graph.pyx +++ b/dysgu/graph.pyx @@ -6,6 +6,7 @@ from collections import defaultdict, deque, namedtuple import numpy as np cimport numpy as np import sortedcontainers +import cython from cpython cimport array import array import re @@ -542,7 +543,7 @@ cdef class PairedEndScoper: cdef class TemplateEdges: - cdef unordered_map[string, vector[int]] templates_s # robin map was buggy for iterating + cdef public unordered_map[string, vector[int]] templates_s # robin map was buggy for iterating def __init__(self): pass @@ -555,27 +556,29 @@ cdef class TemplateEdges: val.push_back(flag) self.templates_s[key].insert(self.templates_s[key].end(), val.begin(), val.end()) - def iterate_map(self): - - cdef unordered_map[string, vector[int]].iterator it = self.templates_s.begin() - cdef string first - cdef vector[int] second - while it != self.templates_s.end(): - first = dereference(it).first - second = dereference(it).second - yield str(dereference(it).first), list(dereference(it).second) # Array values are flag, node name, query start - postincrement(it) - cdef void add_template_edges(G, TemplateEdges template_edges): # this function joins up template reads (read 1, read 2, plus any supplementary) cdef int ii, u_start, v_start, u, v, uflag, vflag + # normally 2 reads for paired end, or >2 if supplementary reads - for qname, arr in template_edges.iterate_map(): + cdef unordered_map[string, vector[int]].iterator it = template_edges.templates_s.begin() + # cdef string qname + cdef vector[int] arr + while it != template_edges.templates_s.end(): + + # qname = str(dereference(it).first) + + arr = dereference(it).second + # Array values are query start, node-name, flag + # if qname == "D00360:18:H8VC6ADXX:1:1210:7039:44052": + # echo(arr) + postincrement(it) + read1_aligns = [] read2_aligns = [] - for ii in range(0, len(arr), 3): - if arr[ii + 2] & 64: + for ii in range(0, arr.size(), 3): + if arr[ii + 2] & 64: # first in pair read1_aligns.append(arr[ii:ii + 3]) else: read2_aligns.append(arr[ii:ii + 3]) @@ -607,7 +610,7 @@ cdef void add_template_edges(G, TemplateEdges template_edges): primary2 = read2_aligns[0][1] else: if len(read2_aligns) > 2: - read2_aligns = sorted(read2_aligns) + read2_aligns = sorted(read2_aligns) # sorted by query pos for ii in range(len(read2_aligns) - 1): u_start, u, uflag = read2_aligns[ii] if not uflag & 2304: # Is primary @@ -624,6 +627,27 @@ cdef void add_template_edges(G, TemplateEdges template_edges): G.addEdge(primary1, primary2, w=1) +@cython.auto_pickle(True) +cdef class NodeName: + cdef public uint64_t hash_name + cdef public uint64_t tell + cdef public uint32_t pos + cdef public int32_t cigar_index + cdef public uint32_t event_pos + cdef public uint16_t flag + cdef public uint16_t chrom + def __init__(self, h, f, p, c, t, cigar_index, event_pos): + self.hash_name = h + self.flag = f + self.pos = p + self.chrom = c + self.tell = t + self.cigar_index = cigar_index + self.event_pos = event_pos + + # def as_tuple(self): + # return self.h, self.f, self.p, self.c, self.t, self.cigar_index, self.event_pos + cdef class NodeToName: # Index these vectors to get the unique 'template_name' cdef vector[uint64_t] h @@ -650,7 +674,7 @@ cdef class NodeToName: self.event_pos.push_back(g) def __getitem__(self, idx): - return self.h[idx], self.f[idx], self.p[idx], self.c[idx], self.t[idx], self.cigar_index[idx], self.event_pos[idx] + return NodeName(self.h[idx], self.f[idx], self.p[idx], self.c[idx], self.t[idx], self.cigar_index[idx], self.event_pos[idx]) cdef get_query_pos_from_cigarstring(cigar, pos): @@ -660,19 +684,18 @@ cdef get_query_pos_from_cigarstring(cigar, pos): cdef bint i = 0 cdef int ref_end = pos cdef int slen - for slen, opp in cigar: if not i and opp in "SH": start += slen end += slen i = 1 - elif opp == "M": - end += slen - ref_end += slen elif opp == "D": ref_end += slen elif opp == "I": end += slen + elif opp in "M=X": + end += slen + ref_end += slen i = 1 return start, end, pos, ref_end @@ -714,7 +737,7 @@ cdef alignments_from_sa_tag(r, gettid, thresh, paired_end, mapq_thresh): cigar = sa[3] matches = [(int(slen), opp) for slen, opp in re.findall(r'(\d+)([A-Z]{1})', sa[3])] # parse cigar - query_start, query_end, ref_start, ref_end = get_query_pos_from_cigarstring(matches, start_pos2) + query_start, query_end, ref_start, ref_end = get_query_pos_from_cigarstring(matches, start_pos2) #, strand == current_strand) if current_strand != strand: # count from end start_temp = query_length - query_end @@ -722,9 +745,9 @@ cdef alignments_from_sa_tag(r, gettid, thresh, paired_end, mapq_thresh): query_start = start_temp # If another local alignment is found use only this, usually corresponds to the other side of an insertion/dup - if aln_chrom == chrom2 and position_distance(aln_start, aln_end, ref_start, ref_end) < thresh: - query_aligns = [query_aligns[0], (query_start, query_end, ref_start, ref_end, chrom2, mq, strand == current_strand)] - break + # if aln_chrom == chrom2 and position_distance(aln_start, aln_end, ref_start, ref_end) < thresh: + # query_aligns = [query_aligns[0], (query_start, query_end, ref_start, ref_end, chrom2, mq, strand == current_strand)] + # break query_aligns.append((query_start, query_end, ref_start, ref_end, chrom2, mq, strand == current_strand)) @@ -848,7 +871,7 @@ cdef void add_to_graph(G, AlignedSegment r, PairedEndScoper_t pe_scope, Template # # if r.qname in look: # if node_name in node_look: # echo(r.qname, r.pos) - # if r.qname == "m64004_190803_004451/154077992/ccs": + # if r.qname == "D00360:18:H8VC6ADXX:1:1210:7039:44052": # echo("@", r.flag, node_name, chrom, event_pos, chrom2, pos2, list(other_nodes), # count_sc_edges, cigar_index, length_from_cigar) # echo() @@ -994,7 +1017,7 @@ cdef void process_alignment(G, AlignedSegment r, int clip_l, int loci_dist, gett if read_enum == SPLIT: # Parse SA tag. For paired reads - if r.has_tag("SA") and good_clip: # Parse SA, first alignment is the other read primary line + if r.has_tag("SA") and good_clip: # Parse SA, first alignment is the other read primary alignment all_aligns, index = alignments_from_sa_tag(r, gettid, loci_dist, paired_end, mapq_thresh) event = all_aligns[index] if len(all_aligns) == 1: @@ -1392,41 +1415,16 @@ cpdef tuple construct_graph(genome_scanner, infile, int max_dist, int clustering return G, node_to_name, bad_clip_counter, site_adder -cpdef dict get_reads(infile, sub_graph_reads): - - rd = dict() - cdef int j, int_node - cdef long int p - cdef uint64_t v - cdef AlignedSegment a - for int_node, node in sub_graph_reads.items(): - node = tuple(node[:-2]) # drop cigar index and event pos - p = node[4] - infile.seek(p) - a = next(infile) - v = xxhasher(bam_get_qname(a._delegate), len(a.qname), 42) - n1 = (v, a.flag, a.pos, a.rname, p) - # Try next few reads, sometimes they are on top of one another - if n1 != node: - for j in range(5): - a = next(infile) - n2 = (xxhasher(bam_get_qname(a._delegate), len(a.qname), 42), a.flag, a.pos, a.rname, p) - if n2 == node: - rd[int_node] = a - break - else: - rd[int_node] = a - return rd - - cdef BFS_local(G, int source, unordered_set[int]& visited ): # Create a queue for BFS cdef array.array queue = array.array("L", [source]) nodes_found = set([]) cdef int u, v + cdef vector[int] neighbors while queue: u = queue.pop(0) - for v in G.neighbors(u): + neighbors = G.neighbors(u) + for v in neighbors: if visited.find(v) == visited.end(): if G.weight(u, v) > 1: if u not in nodes_found: @@ -1442,12 +1440,13 @@ cdef get_partitions(G, nodes): cdef unordered_set[int] seen cdef int u, v, i + cdef vector[int] neighbors parts = [] for u in nodes: if seen.find(u) != seen.end(): continue - - for v in G.neighbors(u): + neighbors = G.neighbors(u) + for v in neighbors: if seen.find(v) != seen.end(): continue @@ -1464,17 +1463,16 @@ cdef tuple count_support_between(G, parts, int min_support): cdef int i, j, node, child, any_out_edges cdef tuple t + cdef unsigned long[:] p if len(parts) == 0: return {}, {} elif len(parts) == 1: return {}, {0: parts[0]} - # return {}, {list(parts.keys())[0]: array.array("L", list(parts.values())[0])} # Make a table to count from, int-int cdef Py_Int2IntMap p2i = map_set_utils.Py_Int2IntMap() for i, p in enumerate(parts): - # for i, p in parts.items(): for node in p: p2i.insert(node, i) @@ -1485,13 +1483,14 @@ cdef tuple count_support_between(G, parts, int min_support): self_counts = {} seen_t = set([]) + cdef vector[int] neighbors for i, p in enumerate(parts): - # for i, p in parts.items(): current_t = set([]) for node in p: any_out_edges = 0 # Keeps track of number of outgoing pairs, or self edges - for child in G.neighbors(node): + neighbors = G.neighbors(node) + for child in neighbors: if not p2i.has_key(child): continue # Exterior child, not in any partition @@ -1529,7 +1528,7 @@ cdef tuple count_support_between(G, parts, int min_support): seen_t.update(current_t) # Only count edge once - # save memory by converting support_between to 2d array + # save memory by converting support_between to array for t in current_t: counts[t] = [np.fromiter(m, dtype="uint32", count=len(m)) for m in counts[t]] @@ -1552,7 +1551,6 @@ cpdef break_large_component(G, component, int min_support): # Make a table to count from, int-int cdef Py_Int2IntMap p2i = map_set_utils.Py_Int2IntMap() for i, p in enumerate(parts): - # for i, p in parts.items(): for node in p: p2i.insert(node, i) @@ -1563,13 +1561,14 @@ cpdef break_large_component(G, component, int min_support): self_counts = defaultdict(int) seen_t = set([]) + cdef vector[int] neighbors for i, p in enumerate(parts): - # for i, p in parts.items(): current_t = set([]) for node in p: any_out_edges = 0 # Keeps track of number of outgoing pairs, or self edges - for child in G.neighbors(node): + neighbors = G.neighbors(node) + for child in neighbors: if not p2i.has_key(child): continue # Exterior child, not in any partition @@ -1636,7 +1635,7 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, G, int min_su # Need to keep a record of all node info, and cigar indexes key = node_to_name[v] - if key[5] != -1: + if key.cigar_index != -1: support_estimate += 2 else: support_estimate += 1 @@ -1647,10 +1646,8 @@ cpdef proc_component(node_to_name, component, read_buffer, infile, G, int min_su # Explore component for locally interacting nodes; create partitions using these partitions = get_partitions(G, component) - # partitions = {i: p for i, p in enumerate(partitions)} - support_between, support_within = count_support_between(G, partitions, min_support) - # echo("support between", len(support_between), len(support_within), info, partitions, len(n2n), info) + if len(support_between) == 0 and len(support_within) == 0: if not paired_end: diff --git a/dysgu/robin_hood.h b/dysgu/robin_hood.h index 481a084..0af031f 100755 --- a/dysgu/robin_hood.h +++ b/dysgu/robin_hood.h @@ -6,12 +6,11 @@ // _/_____/ // // Fast & memory efficient hashtable based on robin hood hashing for C++11/14/17/20 -// version 3.6.0 // https://github.com/martinus/robin-hood-hashing // // Licensed under the MIT License . // SPDX-License-Identifier: MIT -// Copyright (c) 2018-2020 Martin Ankerl +// Copyright (c) 2018-2021 Martin Ankerl // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -35,23 +34,29 @@ #define ROBIN_HOOD_H_INCLUDED // see https://semver.org/ -#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes -#define ROBIN_HOOD_VERSION_MINOR 6 // for adding functionality in a backwards-compatible manner -#define ROBIN_HOOD_VERSION_PATCH 0 // for backwards-compatible bug fixes +#define ROBIN_HOOD_VERSION_MAJOR 3 // for incompatible API changes +#define ROBIN_HOOD_VERSION_MINOR 11 // for adding functionality in a backwards-compatible manner +#define ROBIN_HOOD_VERSION_PATCH 5 // for backwards-compatible bug fixes #include #include #include #include +#include +#include // only to support hash of smart pointers #include #include #include #include +#if __cplusplus >= 201703L +# include +#endif // #define ROBIN_HOOD_LOG_ENABLED #ifdef ROBIN_HOOD_LOG_ENABLED # include -# define ROBIN_HOOD_LOG(x) std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl +# define ROBIN_HOOD_LOG(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; #else # define ROBIN_HOOD_LOG(x) #endif @@ -59,8 +64,8 @@ // #define ROBIN_HOOD_TRACE_ENABLED #ifdef ROBIN_HOOD_TRACE_ENABLED # include -# define ROBIN_HOOD_TRACE(x) \ - std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << x << std::endl +# define ROBIN_HOOD_TRACE(...) \ + std::cout << __FUNCTION__ << "@" << __LINE__ << ": " << __VA_ARGS__ << std::endl; #else # define ROBIN_HOOD_TRACE(x) #endif @@ -128,30 +133,32 @@ static Counts& counts() { #endif // count leading/trailing bits -#ifdef _MSC_VER -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward -# else -# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 -# endif -# include -# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ - [](size_t mask) noexcept -> int { \ - unsigned long index; \ - return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ - : ROBIN_HOOD(BITNESS); \ - }(x) -#else -# if ROBIN_HOOD(BITNESS) == 32 -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl +#if !defined(ROBIN_HOOD_DISABLE_INTRINSICS) +# ifdef _MSC_VER +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BITSCANFORWARD() _BitScanForward64 +# endif +# include +# pragma intrinsic(ROBIN_HOOD(BITSCANFORWARD)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) \ + [](size_t mask) noexcept -> int { \ + unsigned long index; \ + return ROBIN_HOOD(BITSCANFORWARD)(&index, mask) ? static_cast(index) \ + : ROBIN_HOOD(BITNESS); \ + }(x) # else -# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll -# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll +# if ROBIN_HOOD(BITNESS) == 32 +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzl +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzl +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_CTZ() __builtin_ctzll +# define ROBIN_HOOD_PRIVATE_DEFINITION_CLZ() __builtin_clzll +# endif +# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) +# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) # endif -# define ROBIN_HOOD_COUNT_LEADING_ZEROES(x) ((x) ? ROBIN_HOOD(CLZ)(x) : ROBIN_HOOD(BITNESS)) -# define ROBIN_HOOD_COUNT_TRAILING_ZEROES(x) ((x) ? ROBIN_HOOD(CTZ)(x) : ROBIN_HOOD(BITNESS)) #endif // fallthrough @@ -175,6 +182,28 @@ static Counts& counts() { # define ROBIN_HOOD_UNLIKELY(condition) __builtin_expect(condition, 0) #endif +// detect if native wchar_t type is availiable in MSVC +#ifdef _MSC_VER +# ifdef _NATIVE_WCHAR_T_DEFINED +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_NATIVE_WCHART() 1 +#endif + +// detect if MSVC supports the pair(std::piecewise_construct_t,...) consructor being constexpr +#ifdef _MSC_VER +# if _MSC_VER <= 1900 +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 1 +# else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +# endif +#else +# define ROBIN_HOOD_PRIVATE_DEFINITION_BROKEN_CONSTEXPR() 0 +#endif + // workaround missing "is_trivially_copyable" in g++ < 5.0 // See https://stackoverflow.com/a/31798726/48181 #if defined(__GNUC__) && __GNUC__ < 5 @@ -274,39 +303,18 @@ using index_sequence_for = make_index_sequence; namespace detail { -// umul -#if defined(__SIZEOF_INT128__) -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_UMUL128() 1 -# if defined(__GNUC__) || defined(__clang__) -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wpedantic" -using uint128_t = unsigned __int128; -# pragma GCC diagnostic pop -# endif -inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) noexcept { - auto result = static_cast(a) * static_cast(b); - *high = static_cast(result >> 64U); - return static_cast(result); -} -#elif (defined(_MSC_VER) && ROBIN_HOOD(BITNESS) == 64) -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_UMUL128() 1 -# include // for __umulh -# pragma intrinsic(__umulh) -# ifndef _M_ARM64 -# pragma intrinsic(_umul128) -# endif -inline uint64_t umul128(uint64_t a, uint64_t b, uint64_t* high) noexcept { -# ifdef _M_ARM64 - *high = __umulh(a, b); - return ((uint64_t)(a)) * (b); -# else - return _umul128(a, b, high); -# endif -} +// make sure we static_cast to the correct type for hash_int +#if ROBIN_HOOD(BITNESS) == 64 +using SizeT = uint64_t; #else -# define ROBIN_HOOD_PRIVATE_DEFINITION_HAS_UMUL128() 0 +using SizeT = uint32_t; #endif +template +T rotr(T x, unsigned k) { + return (x >> k) | (x << (8U * sizeof(T) - k)); +} + // This cast gets rid of warnings like "cast from 'uint8_t*' {aka 'unsigned char*'} to // 'uint64_t*' {aka 'long unsigned int*'} increases required alignment of target type". Use with // care! @@ -323,14 +331,14 @@ inline T reinterpret_cast_no_cast_align_warning(void const* ptr) noexcept { // make sure this is not inlined as it is slow and dramatically enlarges code, thus making other // inlinings more difficult. Throws are also generally the slow path. template -ROBIN_HOOD(NOINLINE) +[[noreturn]] ROBIN_HOOD(NOINLINE) #if ROBIN_HOOD(HAS_EXCEPTIONS) -void doThrow(Args&&... args) { + void doThrow(Args&&... args) { // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-array-to-pointer-decay) throw E(std::forward(args)...); } #else -void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { + void doThrow(Args&&... ROBIN_HOOD_UNUSED(args) /*unused*/) { abort(); } #endif @@ -396,7 +404,8 @@ class BulkPoolAllocator { void reset() noexcept { while (mListForFree) { T* tmp = *mListForFree; - free(mListForFree); + ROBIN_HOOD_LOG("std::free") + std::free(mListForFree); mListForFree = reinterpret_cast_no_cast_align_warning(tmp); } mHead = nullptr; @@ -431,8 +440,10 @@ class BulkPoolAllocator { // calculate number of available elements in ptr if (numBytes < ALIGNMENT + ALIGNED_SIZE) { // not enough data for at least one element. Free and return. - free(ptr); + ROBIN_HOOD_LOG("std::free") + std::free(ptr); } else { + ROBIN_HOOD_LOG("add to buffer") add(ptr, numBytes); } } @@ -473,10 +484,10 @@ class BulkPoolAllocator { mListForFree = data; // create linked list for newly allocated data - auto const headT = + auto* const headT = reinterpret_cast_no_cast_align_warning(reinterpret_cast(ptr) + ALIGNMENT); - auto const head = reinterpret_cast(headT); + auto* const head = reinterpret_cast(headT); // Visual Studio compiler automatically unrolls this loop, which is pretty cool for (size_t i = 0; i < numElements; ++i) { @@ -496,9 +507,10 @@ class BulkPoolAllocator { size_t const numElementsToAlloc = calcNumElementsToAlloc(); // alloc new memory: [prev |T, T, ... T] - // std::cout << (sizeof(T*) + ALIGNED_SIZE * numElementsToAlloc) << " bytes" << std::endl; size_t const bytes = ALIGNMENT + ALIGNED_SIZE * numElementsToAlloc; - add(assertNotNull(malloc(bytes)), bytes); + ROBIN_HOOD_LOG("std::malloc " << bytes << " = " << ALIGNMENT << " + " << ALIGNED_SIZE + << " * " << numElementsToAlloc) + add(assertNotNull(std::malloc(bytes)), bytes); return mHead; } @@ -534,30 +546,29 @@ struct NodeAllocator { // we are not using the data, so just free it. void addOrFree(void* ptr, size_t ROBIN_HOOD_UNUSED(numBytes) /*unused*/) noexcept { - free(ptr); + ROBIN_HOOD_LOG("std::free") + std::free(ptr); } }; template struct NodeAllocator : public BulkPoolAllocator {}; -// dummy hash, unsed as mixer when robin_hood::hash is already used -template -struct identity_hash { - constexpr size_t operator()(T const& obj) const noexcept { - return static_cast(obj); - } -}; - // c++14 doesn't have is_nothrow_swappable, and clang++ 6.0.1 doesn't like it either, so I'm making // my own here. namespace swappable { +#if ROBIN_HOOD(CXX) < ROBIN_HOOD(CXX17) using std::swap; template struct nothrow { static const bool value = noexcept(swap(std::declval(), std::declval())); }; - +#else +template +struct nothrow { + static const bool value = std::is_nothrow_swappable::value; +}; +#endif } // namespace swappable } // namespace detail @@ -586,44 +597,46 @@ struct pair { , second(o.second) {} // pair constructors are explicit so we don't accidentally call this ctor when we don't have to. - explicit constexpr pair(std::pair&& o) noexcept( - noexcept(T1(std::move(std::declval()))) && - noexcept(T2(std::move(std::declval())))) + explicit constexpr pair(std::pair&& o) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) : first(std::move(o.first)) , second(std::move(o.second)) {} - constexpr pair(T1&& a, T2&& b) noexcept(noexcept(T1(std::move(std::declval()))) && - noexcept(T2(std::move(std::declval())))) + constexpr pair(T1&& a, T2&& b) noexcept(noexcept( + T1(std::move(std::declval()))) && noexcept(T2(std::move(std::declval())))) : first(std::move(a)) , second(std::move(b)) {} template - constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward(std::declval()))) && - noexcept(T2(std::forward(std::declval())))) + constexpr pair(U1&& a, U2&& b) noexcept(noexcept(T1(std::forward( + std::declval()))) && noexcept(T2(std::forward(std::declval())))) : first(std::forward(a)) , second(std::forward(b)) {} template - constexpr pair( - std::piecewise_construct_t /*unused*/, std::tuple a, - std::tuple b) noexcept(noexcept(pair(std::declval&>(), - std::declval&>(), - ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()))) + // MSVC 2015 produces error "C2476: ‘constexpr’ constructor does not initialize all members" + // if this constructor is constexpr +#if !ROBIN_HOOD(BROKEN_CONSTEXPR) + constexpr +#endif + pair(std::piecewise_construct_t /*unused*/, std::tuple a, + std::tuple + b) noexcept(noexcept(pair(std::declval&>(), + std::declval&>(), + ROBIN_HOOD_STD::index_sequence_for(), + ROBIN_HOOD_STD::index_sequence_for()))) : pair(a, b, ROBIN_HOOD_STD::index_sequence_for(), - ROBIN_HOOD_STD::index_sequence_for()) {} + ROBIN_HOOD_STD::index_sequence_for()) { + } // constructor called from the std::piecewise_construct_t ctor template - pair(std::tuple& a, std::tuple& b, - ROBIN_HOOD_STD::index_sequence /*unused*/, - ROBIN_HOOD_STD::index_sequence< - I2...> /*unused*/) noexcept(noexcept(T1(std:: - forward(std::get( - std::declval< - std::tuple&>()))...)) && - noexcept(T2(std::forward( - std::get(std::declval&>()))...))) + pair(std::tuple& a, std::tuple& b, ROBIN_HOOD_STD::index_sequence /*unused*/, ROBIN_HOOD_STD::index_sequence /*unused*/) noexcept( + noexcept(T1(std::forward(std::get( + std::declval&>()))...)) && noexcept(T2(std:: + forward(std::get( + std::declval&>()))...))) : first(std::forward(std::get(a))...) , second(std::forward(std::get(b))...) { // make visual studio compiler happy about warning about unused a & b. @@ -658,7 +671,9 @@ inline constexpr bool operator!=(pair const& x, pair const& y) { return !(x == y); } template -inline constexpr bool operator<(pair const& x, pair const& y) { +inline constexpr bool operator<(pair const& x, pair const& y) noexcept(noexcept( + std::declval() < std::declval()) && noexcept(std::declval() < + std::declval())) { return x.first < y.first || (!(y.first < x.first) && x.second < y.second); } template @@ -674,14 +689,12 @@ inline constexpr bool operator>=(pair const& x, pair const& y) { return !(x < y); } -// Hash an arbitrary amount of bytes. This is basically Murmur2 hash without caring about big -// endianness. TODO(martinus) add a fallback for very large strings? -static size_t hash_bytes(void const* ptr, size_t const len) noexcept { +inline size_t hash_bytes(void const* ptr, size_t len) noexcept { static constexpr uint64_t m = UINT64_C(0xc6a4a7935bd1e995); static constexpr uint64_t seed = UINT64_C(0xe17a1465); static constexpr unsigned int r = 47; - auto const data64 = static_cast(ptr); + auto const* const data64 = static_cast(ptr); uint64_t h = seed ^ (len * m); size_t const n_blocks = len / 8; @@ -696,7 +709,7 @@ static size_t hash_bytes(void const* ptr, size_t const len) noexcept { h *= m; } - auto const data8 = reinterpret_cast(data64 + n_blocks); + auto const* const data8 = reinterpret_cast(data64 + n_blocks); switch (len & 7U) { case 7: h ^= static_cast(data8[6]) << 48U; @@ -725,65 +738,87 @@ static size_t hash_bytes(void const* ptr, size_t const len) noexcept { } h ^= h >> r; - h *= m; - h ^= h >> r; + + // not doing the final step here, because this will be done by keyToIdx anyways + // h *= m; + // h ^= h >> r; return static_cast(h); } -inline size_t hash_int(uint64_t obj) noexcept { -#if ROBIN_HOOD(HAS_UMUL128) - // 167079903232 masksum, 120428523 ops best: 0xde5fb9d2630458e9 - static constexpr uint64_t k = UINT64_C(0xde5fb9d2630458e9); - uint64_t h; - uint64_t l = detail::umul128(obj, k, &h); - return h + l; -#elif ROBIN_HOOD(BITNESS) == 32 - uint64_t const r = obj * UINT64_C(0xca4bcaa75ec3f625); - auto h = static_cast(r >> 32U); - auto l = static_cast(r); - return h + l; -#else - // murmurhash 3 finalizer - uint64_t h = obj; - h ^= h >> 33; - h *= 0xff51afd7ed558ccd; - h ^= h >> 33; - h *= 0xc4ceb9fe1a85ec53; - h ^= h >> 33; - return static_cast(h); -#endif +inline size_t hash_int(uint64_t x) noexcept { + // tried lots of different hashes, let's stick with murmurhash3. It's simple, fast, well tested, + // and doesn't need any special 128bit operations. + x ^= x >> 33U; + x *= UINT64_C(0xff51afd7ed558ccd); + x ^= x >> 33U; + + // not doing the final step here, because this will be done by keyToIdx anyways + // x *= UINT64_C(0xc4ceb9fe1a85ec53); + // x ^= x >> 33U; + return static_cast(x); } // A thin wrapper around std::hash, performing an additional simple mixing step of the result. -template +template struct hash : public std::hash { size_t operator()(T const& obj) const noexcept(noexcept(std::declval>().operator()(std::declval()))) { // call base hash auto result = std::hash::operator()(obj); // return mixed of that, to be save against identity has - return hash_int(static_cast(result)); + return hash_int(static_cast(result)); } }; -template <> -struct hash { - size_t operator()(std::string const& str) const noexcept { - return hash_bytes(str.data(), str.size()); +template +struct hash> { + size_t operator()(std::basic_string const& str) const noexcept { + return hash_bytes(str.data(), sizeof(CharT) * str.size()); } }; +#if ROBIN_HOOD(CXX) >= ROBIN_HOOD(CXX17) +template +struct hash> { + size_t operator()(std::basic_string_view const& sv) const noexcept { + return hash_bytes(sv.data(), sizeof(CharT) * sv.size()); + } +}; +#endif + template struct hash { size_t operator()(T* ptr) const noexcept { - return hash_int(reinterpret_cast(ptr)); + return hash_int(reinterpret_cast(ptr)); + } +}; + +template +struct hash> { + size_t operator()(std::unique_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash> { + size_t operator()(std::shared_ptr const& ptr) const noexcept { + return hash_int(reinterpret_cast(ptr.get())); + } +}; + +template +struct hash::value>::type> { + size_t operator()(Enum e) const noexcept { + using Underlying = typename std::underlying_type::type; + return hash{}(static_cast(e)); } }; #define ROBIN_HOOD_HASH_INT(T) \ template <> \ struct hash { \ - size_t operator()(T obj) const noexcept { \ + size_t operator()(T const& obj) const noexcept { \ return hash_int(static_cast(obj)); \ } \ } @@ -799,7 +834,9 @@ ROBIN_HOOD_HASH_INT(signed char); ROBIN_HOOD_HASH_INT(unsigned char); ROBIN_HOOD_HASH_INT(char16_t); ROBIN_HOOD_HASH_INT(char32_t); +#if ROBIN_HOOD(HAS_NATIVE_WCHART) ROBIN_HOOD_HASH_INT(wchar_t); +#endif ROBIN_HOOD_HASH_INT(short); ROBIN_HOOD_HASH_INT(unsigned short); ROBIN_HOOD_HASH_INT(int); @@ -813,8 +850,20 @@ ROBIN_HOOD_HASH_INT(unsigned long long); #endif namespace detail { -// using wrapper classes for hash and key_equal prevents the diamond problem when the same type is -// used. see https://stackoverflow.com/a/28771920/48181 +template +struct void_type { + using type = void; +}; + +template +struct has_is_transparent : public std::false_type {}; + +template +struct has_is_transparent::type> + : public std::true_type {}; + +// using wrapper classes for hash and key_equal prevents the diamond problem when the same type +// is used. see https://stackoverflow.com/a/28771920/48181 template struct WrapHash : public T { WrapHash() = default; @@ -831,8 +880,8 @@ struct WrapKeyEqual : public T { // A highly optimized hashmap implementation, using the Robin Hood algorithm. // -// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but be -// about 2x faster in most cases and require much less allocations. +// In most cases, this map should be usable as a drop-in replacement for std::unordered_map, but +// be about 2x faster in most cases and require much less allocations. // // This implementation uses the following memory layout: // @@ -840,8 +889,8 @@ struct WrapKeyEqual : public T { // // * Node: either a DataNode that directly has the std::pair as member, // or a DataNode with a pointer to std::pair. Which DataNode representation to use -// depends on how fast the swap() operation is. Heuristically, this is automatically choosen based -// on sizeof(). there are always 2^n Nodes. +// depends on how fast the swap() operation is. Heuristically, this is automatically choosen +// based on sizeof(). there are always 2^n Nodes. // // * info: Each Node in the map has a corresponding info byte, so there are 2^n info bytes. // Each byte is initialized to 0, meaning the corresponding Node is empty. Set to 1 means the @@ -849,12 +898,11 @@ struct WrapKeyEqual : public T { // actually belongs to the previous position and was pushed out because that place is already // taken. // -// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the need -// for a idx -// variable. +// * infoSentinel: Sentinel byte set to 1, so that iterator's ++ can stop at end() without the +// need for a idx variable. // -// According to STL, order of templates has effect on throughput. That's why I've moved the boolean -// to the front. +// According to STL, order of templates has effect on throughput. That's why I've moved the +// boolean to the front. // https://www.reddit.com/r/cpp/comments/ahp6iu/compile_time_binary_size_reductions_and_cs_future/eeguck4/ template @@ -870,6 +918,8 @@ class Table static constexpr bool is_flat = IsFlat; static constexpr bool is_map = !std::is_void::value; static constexpr bool is_set = !is_map; + static constexpr bool is_transparent = + has_is_transparent::value && has_is_transparent::value; using key_type = Key; using mapped_type = T; @@ -894,7 +944,8 @@ class Table static constexpr size_t InitialNumElements = sizeof(uint64_t); static constexpr uint32_t InitialInfoNumBits = 5; static constexpr uint8_t InitialInfoInc = 1U << InitialInfoNumBits; - static constexpr uint8_t InitialInfoHashShift = sizeof(size_t) * 8 - InitialInfoNumBits; + static constexpr size_t InfoMask = InitialInfoInc - 1U; + static constexpr uint8_t InitialInfoHashShift = 0; using DataPool = detail::NodeAllocator; // type needs to be wider than uint8_t. @@ -903,8 +954,8 @@ class Table // DataNode //////////////////////////////////////////////////////// // Primary template for the data node. We have special implementations for small and big - // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these on - // the heap so swap merely swaps a pointer. + // objects. For large objects it is assumed that swap() is fairly slow, so we allocate these + // on the heap so swap merely swaps a pointer. template class DataNode {}; @@ -953,8 +1004,8 @@ class Table template ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const - noexcept { + typename std::enable_if::type + getFirst() const noexcept { return mData.first; } template @@ -1036,8 +1087,8 @@ class Table template ROBIN_HOOD(NODISCARD) - typename std::enable_if::type getFirst() const - noexcept { + typename std::enable_if::type + getFirst() const noexcept { return mData->first; } template @@ -1069,7 +1120,7 @@ class Table using Node = DataNode; - // helpers for doInsert: extract first entry (only const required) + // helpers for insertKeyPrepareEmptySpot: extract first entry (only const required) ROBIN_HOOD(NODISCARD) key_type const& getFirstConst(Node const& n) const noexcept { return n.getFirst(); } @@ -1097,8 +1148,8 @@ class Table template struct Cloner { void operator()(M const& source, M& target) const { - auto src = reinterpret_cast(source.mKeyVals); - auto tgt = reinterpret_cast(target.mKeyVals); + auto const* const src = reinterpret_cast(source.mKeyVals); + auto* tgt = reinterpret_cast(target.mKeyVals); auto const numElementsWithBuffer = target.calcNumElementsWithBuffer(target.mMask + 1); std::copy(src, src + target.calcNumBytesTotal(numElementsWithBuffer), tgt); } @@ -1186,8 +1237,8 @@ class Table // compared to end(). Iter() = default; - // Rule of zero: nothing specified. The conversion constructor is only enabled for iterator - // to const_iterator, so it doesn't accidentally work as a copy ctor. + // Rule of zero: nothing specified. The conversion constructor is only enabled for + // iterator to const_iterator, so it doesn't accidentally work as a copy ctor. // Conversion constructor from iterator to const_iterator. template (mInfo); -#if ROBIN_HOOD(LITTLE_ENDIAN) - inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; + size_t n = 0; + while (0U == (n = detail::unaligned_load(mInfo))) { + mInfo += sizeof(size_t); + mKeyVals += sizeof(size_t); + } +#if defined(ROBIN_HOOD_DISABLE_INTRINSICS) + // we know for certain that within the next 8 bytes we'll find a non-zero one. + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 4; + mKeyVals += 4; + } + if (ROBIN_HOOD_UNLIKELY(0U == detail::unaligned_load(mInfo))) { + mInfo += 2; + mKeyVals += 2; + } + if (ROBIN_HOOD_UNLIKELY(0U == *mInfo)) { + mInfo += 1; + mKeyVals += 1; + } #else - inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; +# if ROBIN_HOOD(LITTLE_ENDIAN) + auto inc = ROBIN_HOOD_COUNT_TRAILING_ZEROES(n) / 8; +# else + auto inc = ROBIN_HOOD_COUNT_LEADING_ZEROES(n) / 8; +# endif + mInfo += inc; + mKeyVals += inc; #endif - mInfo += inc; - mKeyVals += inc; - } while (inc == static_cast(sizeof(size_t))); } friend class Table; @@ -1270,17 +1346,17 @@ class Table // The upper 1-5 bits need to be a reasonable good hash, to save comparisons. template void keyToIdx(HashKey&& key, size_t* idx, InfoType* info) const { - // for a user-specified hash that is *not* robin_hood::hash, apply robin_hood::hash as an - // additional mixing step. This serves as a bad hash prevention, if the given data is badly - // mixed. - using Mix = - typename std::conditional, hasher>::value, - ::robin_hood::detail::identity_hash, - ::robin_hood::hash>::type; - *idx = Mix{}(WHash::operator()(key)); + // In addition to whatever hash is used, add another mul & shift so we get better hashing. + // This serves as a bad hash prevention, if the given data is + // badly mixed. + auto h = static_cast(WHash::operator()(key)); + + h *= mHashMultiplier; + h ^= h >> 33U; - *info = mInfoInc + static_cast(*idx >> mInfoHashShift); - *idx &= mMask; + // the lower InitialInfoNumBits are reserved for info. + *info = mInfoInc + static_cast((h & InfoMask) >> mInfoHashShift); + *idx = (static_cast(h) >> InitialInfoNumBits) & mMask; } // forwards the index by one, wrapping around at the end @@ -1308,7 +1384,7 @@ class Table idx = startIdx; while (idx != insertion_idx) { - ROBIN_HOOD_COUNT(shiftUp); + ROBIN_HOOD_COUNT(shiftUp) mInfo[idx] = static_cast(mInfo[idx - 1] + mInfoInc); if (ROBIN_HOOD_UNLIKELY(mInfo[idx] + mInfoInc > 0xFF)) { mMaxNumElementsAllowed = 0; @@ -1319,12 +1395,13 @@ class Table void shiftDown(size_t idx) noexcept(std::is_nothrow_move_assignable::value) { // until we find one that is either empty or has zero offset. - // TODO(martinus) we don't need to move everything, just the last one for the same bucket. + // TODO(martinus) we don't need to move everything, just the last one for the same + // bucket. mKeyVals[idx].destroy(*this); // until we find one that is either empty or has zero offset. while (mInfo[idx + 1] >= 2 * mInfoInc) { - ROBIN_HOOD_COUNT(shiftDown); + ROBIN_HOOD_COUNT(shiftDown) mInfo[idx] = static_cast(mInfo[idx + 1] - mInfoInc); mKeyVals[idx] = std::move(mKeyVals[idx + 1]); ++idx; @@ -1340,8 +1417,8 @@ class Table template ROBIN_HOOD(NODISCARD) size_t findIdx(Other const& key) const { - size_t idx; - InfoType info; + size_t idx{}; + InfoType info{}; keyToIdx(key, &idx, &info); do { @@ -1369,16 +1446,16 @@ class Table } // inserts a keyval that is guaranteed to be new, e.g. when the hashmap is resized. - // @return index where the element was created - size_t insert_move(Node&& keyval) { + // @return True on success, false if something went wrong + void insert_move(Node&& keyval) { // we don't retry, fail if overflowing // don't need to check max num elements if (0 == mMaxNumElementsAllowed && !try_increase_info()) { - throwOverflowError(); // impossible to reach LCOV_EXCL_LINE + throwOverflowError(); } - size_t idx; - InfoType info; + size_t idx{}; + InfoType info{}; keyToIdx(keyval.getFirst(), &idx, &info); // skip forward. Use <= because we are certain that the element is not there. @@ -1411,24 +1488,29 @@ class Table mInfo[insertion_idx] = insertion_info; ++mNumElements; - return insertion_idx; } public: using iterator = Iter; using const_iterator = Iter; - // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. This - // tremendously speeds up ctor & dtor of a map that never receives an element. The penalty is - // payed at the first insert, and not before. Lookup of this empty map works because everybody - // points to DummyInfoByte::b. parameter bucket_count is dictated by the standard, but we can - // ignore it. - explicit Table(size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/ = 0, const Hash& h = Hash{}, - const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && - noexcept(KeyEqual(equal))) + Table() noexcept(noexcept(Hash()) && noexcept(KeyEqual())) + : WHash() + , WKeyEqual() { + ROBIN_HOOD_TRACE(this) + } + + // Creates an empty hash map. Nothing is allocated yet, this happens at the first insert. + // This tremendously speeds up ctor & dtor of a map that never receives an element. The + // penalty is payed at the first insert, and not before. Lookup of this empty map works + // because everybody points to DummyInfoByte::b. parameter bucket_count is dictated by the + // standard, but we can ignore it. + explicit Table( + size_t ROBIN_HOOD_UNUSED(bucket_count) /*unused*/, const Hash& h = Hash{}, + const KeyEqual& equal = KeyEqual{}) noexcept(noexcept(Hash(h)) && noexcept(KeyEqual(equal))) : WHash(h) , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) } template @@ -1436,7 +1518,7 @@ class Table const Hash& h = Hash{}, const KeyEqual& equal = KeyEqual{}) : WHash(h) , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) insert(first, last); } @@ -1445,7 +1527,7 @@ class Table const KeyEqual& equal = KeyEqual{}) : WHash(h) , WKeyEqual(equal) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) insert(initlist.begin(), initlist.end()); } @@ -1453,8 +1535,9 @@ class Table : WHash(std::move(static_cast(o))) , WKeyEqual(std::move(static_cast(o))) , DataPool(std::move(static_cast(o))) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (o.mMask) { + mHashMultiplier = std::move(o.mHashMultiplier); mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); @@ -1468,11 +1551,12 @@ class Table } Table& operator=(Table&& o) noexcept { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (&o != this) { if (o.mMask) { // only move stuff if the other map actually has some data destroy(); + mHashMultiplier = std::move(o.mHashMultiplier); mKeyVals = std::move(o.mKeyVals); mInfo = std::move(o.mInfo); mNumElements = std::move(o.mNumElements); @@ -1498,14 +1582,19 @@ class Table : WHash(static_cast(o)) , WKeyEqual(static_cast(o)) , DataPool(static_cast(o)) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (!o.empty()) { // not empty: create an exact copy. it is also possible to just iterate through all // elements and insert them, but copying is probably faster. auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - mKeyVals = static_cast(detail::assertNotNull( - malloc(calcNumBytesTotal(numElementsWithBuffer)))); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mHashMultiplier = o.mHashMultiplier; + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); // no need for calloc because clonData does memcpy mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); mNumElements = o.mNumElements; @@ -1521,14 +1610,14 @@ class Table // Not sure why clang-tidy thinks this doesn't handle self assignment, it does // NOLINTNEXTLINE(bugprone-unhandled-self-assignment,cert-oop54-cpp) Table& operator=(Table const& o) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (&o == this) { // prevent assigning of itself return *this; } - // we keep using the old allocator and not assign the new one, because we want to keep the - // memory available. when it is the same size. + // we keep using the old allocator and not assign the new one, because we want to keep + // the memory available. when it is the same size. if (o.empty()) { if (0 == mMask) { // nothing to do, we are empty too @@ -1553,12 +1642,16 @@ class Table // no luck: we don't have the same array size allocated, so we need to realloc. if (0 != mMask) { // only deallocate if we actually have data! - free(mKeyVals); + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); } auto const numElementsWithBuffer = calcNumElementsWithBuffer(o.mMask + 1); - mKeyVals = static_cast(detail::assertNotNull( - malloc(calcNumBytesTotal(numElementsWithBuffer)))); + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::malloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = static_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); // no need for calloc here because cloneData performs a memcpy. mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); @@ -1567,6 +1660,7 @@ class Table WHash::operator=(static_cast(o)); WKeyEqual::operator=(static_cast(o)); DataPool::operator=(static_cast(o)); + mHashMultiplier = o.mHashMultiplier; mNumElements = o.mNumElements; mMask = o.mMask; mMaxNumElementsAllowed = o.mMaxNumElementsAllowed; @@ -1579,17 +1673,17 @@ class Table // Swaps everything between the two maps. void swap(Table& o) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) using std::swap; swap(o, *this); } // Clears all data, without resizing. void clear() { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (empty()) { - // don't do anything! also important because we don't want to write to DummyInfoByte::b, - // even though we would just write 0 to it. + // don't do anything! also important because we don't want to write to + // DummyInfoByte::b, even though we would just write 0 to it. return; } @@ -1607,13 +1701,13 @@ class Table // Destroys the map and all it's contents. ~Table() { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) destroy(); } // Checks if both tables contain the same entries. Order is irrelevant. bool operator==(const Table& other) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (other.size() != size()) { return false; } @@ -1627,20 +1721,61 @@ class Table } bool operator!=(const Table& other) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return !operator==(other); } template typename std::enable_if::value, Q&>::type operator[](const key_type& key) { - ROBIN_HOOD_TRACE(this); - return doCreateByKey(key); + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(key), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); } template typename std::enable_if::value, Q&>::type operator[](key_type&& key) { - ROBIN_HOOD_TRACE(this); - return doCreateByKey(std::move(key)); + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = + Node(*this, std::piecewise_construct, std::forward_as_tuple(std::move(key)), + std::forward_as_tuple()); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + } + + return mKeyVals[idxAndState.first].getSecond(); } template @@ -1651,31 +1786,123 @@ class Table } } + void insert(std::initializer_list ilist) { + for (auto&& vt : ilist) { + insert(std::move(vt)); + } + } + template std::pair emplace(Args&&... args) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) Node n{*this, std::forward(args)...}; - auto r = doInsert(std::move(n)); - if (!r.second) { - // insertion not possible: destroy node - // NOLINTNEXTLINE(bugprone-use-after-move) + auto idxAndState = insertKeyPrepareEmptySpot(getFirstConst(n)); + switch (idxAndState.second) { + case InsertionState::key_found: + n.destroy(*this); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node(*this, std::move(n)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = std::move(n); + break; + + case InsertionState::overflow_error: n.destroy(*this); + throwOverflowError(); + break; } - return r; + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + iterator emplace_hint(const_iterator position, Args&&... args) { + (void)position; + return emplace(std::forward(args)...).first; + } + + template + std::pair try_emplace(const key_type& key, Args&&... args) { + return try_emplace_impl(key, std::forward(args)...); + } + + template + std::pair try_emplace(key_type&& key, Args&&... args) { + return try_emplace_impl(std::move(key), std::forward(args)...); + } + + template + iterator try_emplace(const_iterator hint, const key_type& key, Args&&... args) { + (void)hint; + return try_emplace_impl(key, std::forward(args)...).first; + } + + template + iterator try_emplace(const_iterator hint, key_type&& key, Args&&... args) { + (void)hint; + return try_emplace_impl(std::move(key), std::forward(args)...).first; + } + + template + std::pair insert_or_assign(const key_type& key, Mapped&& obj) { + return insertOrAssignImpl(key, std::forward(obj)); + } + + template + std::pair insert_or_assign(key_type&& key, Mapped&& obj) { + return insertOrAssignImpl(std::move(key), std::forward(obj)); + } + + template + iterator insert_or_assign(const_iterator hint, const key_type& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(key, std::forward(obj)).first; + } + + template + iterator insert_or_assign(const_iterator hint, key_type&& key, Mapped&& obj) { + (void)hint; + return insertOrAssignImpl(std::move(key), std::forward(obj)).first; } std::pair insert(const value_type& keyval) { - ROBIN_HOOD_TRACE(this); - return doInsert(keyval); + ROBIN_HOOD_TRACE(this) + return emplace(keyval); + } + + iterator insert(const_iterator hint, const value_type& keyval) { + (void)hint; + return emplace(keyval).first; } std::pair insert(value_type&& keyval) { - return doInsert(std::move(keyval)); + return emplace(std::move(keyval)); + } + + iterator insert(const_iterator hint, value_type&& keyval) { + (void)hint; + return emplace(std::move(keyval)).first; } // Returns 1 if key is found, 0 otherwise. size_t count(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) + auto kv = mKeyVals + findIdx(key); + if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { + return 1; + } + return 0; + } + + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type count(const OtherKey& key) const { + ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv != reinterpret_cast_no_cast_align_warning(mInfo)) { return 1; @@ -1687,12 +1914,18 @@ class Table return 1U == count(key); } + template + // NOLINTNEXTLINE(modernize-use-nodiscard) + typename std::enable_if::type contains(const OtherKey& key) const { + return 1U == count(key); + } + // Returns a reference to the value found for key. // Throws std::out_of_range if element cannot be found template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::value, Q&>::type at(key_type const& key) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { doThrow("key not found"); @@ -1705,7 +1938,7 @@ class Table template // NOLINTNEXTLINE(modernize-use-nodiscard) typename std::enable_if::value, Q const&>::type at(key_type const& key) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) auto kv = mKeyVals + findIdx(key); if (kv == reinterpret_cast_no_cast_align_warning(mInfo)) { doThrow("key not found"); @@ -1714,44 +1947,60 @@ class Table } const_iterator find(const key_type& key) const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return const_iterator{mKeyVals + idx, mInfo + idx}; } template const_iterator find(const OtherKey& key, is_transparent_tag /*unused*/) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return const_iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type // NOLINT(modernize-use-nodiscard) + find(const OtherKey& key) const { // NOLINT(modernize-use-nodiscard) + ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return const_iterator{mKeyVals + idx, mInfo + idx}; } iterator find(const key_type& key) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return iterator{mKeyVals + idx, mInfo + idx}; } template iterator find(const OtherKey& key, is_transparent_tag /*unused*/) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) + const size_t idx = findIdx(key); + return iterator{mKeyVals + idx, mInfo + idx}; + } + + template + typename std::enable_if::type find(const OtherKey& key) { + ROBIN_HOOD_TRACE(this) const size_t idx = findIdx(key); return iterator{mKeyVals + idx, mInfo + idx}; } iterator begin() { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (empty()) { return end(); } return iterator(mKeyVals, mInfo, fast_forward_tag{}); } const_iterator begin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return cbegin(); } const_iterator cbegin() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) if (empty()) { return cend(); } @@ -1759,22 +2008,22 @@ class Table } iterator end() { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) // no need to supply valid info pointer: end() must not be dereferenced, and only node // pointer is compared. return iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; } const_iterator end() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return cend(); } const_iterator cend() const { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return const_iterator{reinterpret_cast_no_cast_align_warning(mInfo), nullptr}; } iterator erase(const_iterator pos) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) // its safe to perform const cast here // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) return erase(iterator{const_cast(pos.mKeyVals), const_cast(pos.mInfo)}); @@ -1782,7 +2031,7 @@ class Table // Erases element at pos, returns iterator to the next element. iterator erase(iterator pos) { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) // we assume that pos always points to a valid entry, and not end(). auto const idx = static_cast(pos.mKeyVals - mKeyVals); @@ -1799,9 +2048,9 @@ class Table } size_t erase(const key_type& key) { - ROBIN_HOOD_TRACE(this); - size_t idx; - InfoType info; + ROBIN_HOOD_TRACE(this) + size_t idx{}; + InfoType info{}; keyToIdx(key, &idx, &info); // check while info matches with the source idx @@ -1821,53 +2070,66 @@ class Table // reserves space for the specified number of elements. Makes sure the old data fits. // exactly the same as reserve(c). void rehash(size_t c) { - reserve(c); + // forces a reserve + reserve(c, true); } // reserves space for the specified number of elements. Makes sure the old data fits. - // Exactly the same as resize(c). Use resize(0) to shrink to fit. + // Exactly the same as rehash(c). Use rehash(0) to shrink to fit. void reserve(size_t c) { - ROBIN_HOOD_TRACE(this); - auto const minElementsAllowed = (std::max)(c, mNumElements); + // reserve, but don't force rehash + reserve(c, false); + } + + // If possible reallocates the map to a smaller one. This frees the underlying table. + // Does not do anything if load_factor is too large for decreasing the table's size. + void compact() { + ROBIN_HOOD_TRACE(this) auto newSize = InitialNumElements; - while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + while (calcMaxNumElementsAllowed(newSize) < mNumElements && newSize != 0) { newSize *= 2; } if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { throwOverflowError(); } - rehashPowerOfTwo(newSize); + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (newSize < mMask + 1) { + rehashPowerOfTwo(newSize, true); + } } size_type size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return mNumElements; } size_type max_size() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return static_cast(-1); } ROBIN_HOOD(NODISCARD) bool empty() const noexcept { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return 0 == mNumElements; } float max_load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return MaxLoadFactor100 / 100.0F; } // Average number of elements per bucket. Since we allow only 1 per bucket float load_factor() const noexcept { // NOLINT(modernize-use-nodiscard) - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return static_cast(size()) / static_cast(mMask + 1); } ROBIN_HOOD(NODISCARD) size_t mask() const noexcept { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return mMask; } @@ -1916,7 +2178,7 @@ class Table template ROBIN_HOOD(NODISCARD) typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) auto it = find(e.first); return it != end() && it->second == e.second; } @@ -1924,14 +2186,35 @@ class Table template ROBIN_HOOD(NODISCARD) typename std::enable_if::value, bool>::type has(const value_type& e) const { - ROBIN_HOOD_TRACE(this); + ROBIN_HOOD_TRACE(this) return find(e) != end(); } + void reserve(size_t c, bool forceRehash) { + ROBIN_HOOD_TRACE(this) + auto const minElementsAllowed = (std::max)(c, mNumElements); + auto newSize = InitialNumElements; + while (calcMaxNumElementsAllowed(newSize) < minElementsAllowed && newSize != 0) { + newSize *= 2; + } + if (ROBIN_HOOD_UNLIKELY(newSize == 0)) { + throwOverflowError(); + } + + ROBIN_HOOD_LOG("newSize > mMask + 1: " << newSize << " > " << mMask << " + 1") + + // only actually do anything when the new size is bigger than the old one. This prevents to + // continuously allocate for each reserve() call. + if (forceRehash || newSize > mMask + 1) { + rehashPowerOfTwo(newSize, false); + } + } + // reserves space for at least the specified number of elements. // only works if numBuckets if power of two - void rehashPowerOfTwo(size_t numBuckets) { - ROBIN_HOOD_TRACE(this); + // True on success, false otherwise + void rehashPowerOfTwo(size_t numBuckets, bool forceFree) { + ROBIN_HOOD_TRACE(this) Node* const oldKeyVals = mKeyVals; uint8_t const* const oldInfo = mInfo; @@ -1939,18 +2222,29 @@ class Table const size_t oldMaxElementsWithBuffer = calcNumElementsWithBuffer(mMask + 1); // resize operation: move stuff - init_data(numBuckets); + initData(numBuckets); if (oldMaxElementsWithBuffer > 1) { for (size_t i = 0; i < oldMaxElementsWithBuffer; ++i) { if (oldInfo[i] != 0) { + // might throw an exception, which is really bad since we are in the middle of + // moving stuff. insert_move(std::move(oldKeyVals[i])); // destroy the node but DON'T destroy the data. oldKeyVals[i].~Node(); } } - // don't destroy old data: put it into the pool instead - DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + // this check is not necessary as it's guarded by the previous if, but it helps + // silence g++'s overeager "attempt to free a non-heap object 'map' + // [-Werror=free-nonheap-object]" warning. + if (oldKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + // don't destroy old data: put it into the pool instead + if (forceFree) { + std::free(oldKeyVals); + } else { + DataPool::addOrFree(oldKeyVals, calcNumBytesTotal(oldMaxElementsWithBuffer)); + } + } } } @@ -1962,17 +2256,80 @@ class Table #endif } - void init_data(size_t max_elements) { + template + std::pair try_emplace_impl(OtherKey&& key, Args&&... args) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(args)...)); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + template + std::pair insertOrAssignImpl(OtherKey&& key, Mapped&& obj) { + ROBIN_HOOD_TRACE(this) + auto idxAndState = insertKeyPrepareEmptySpot(key); + switch (idxAndState.second) { + case InsertionState::key_found: + mKeyVals[idxAndState.first].getSecond() = std::forward(obj); + break; + + case InsertionState::new_node: + ::new (static_cast(&mKeyVals[idxAndState.first])) Node( + *this, std::piecewise_construct, std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overwrite_node: + mKeyVals[idxAndState.first] = Node(*this, std::piecewise_construct, + std::forward_as_tuple(std::forward(key)), + std::forward_as_tuple(std::forward(obj))); + break; + + case InsertionState::overflow_error: + throwOverflowError(); + break; + } + + return std::make_pair(iterator(mKeyVals + idxAndState.first, mInfo + idxAndState.first), + InsertionState::key_found != idxAndState.second); + } + + void initData(size_t max_elements) { mNumElements = 0; mMask = max_elements - 1; mMaxNumElementsAllowed = calcMaxNumElementsAllowed(max_elements); auto const numElementsWithBuffer = calcNumElementsWithBuffer(max_elements); - // calloc also zeroes everything - mKeyVals = reinterpret_cast(detail::assertNotNull( - calloc(1, calcNumBytesTotal(numElementsWithBuffer)))); + // malloc & zero mInfo. Faster than calloc everything. + auto const numBytesTotal = calcNumBytesTotal(numElementsWithBuffer); + ROBIN_HOOD_LOG("std::calloc " << numBytesTotal << " = calcNumBytesTotal(" + << numElementsWithBuffer << ")") + mKeyVals = reinterpret_cast( + detail::assertNotNull(std::malloc(numBytesTotal))); mInfo = reinterpret_cast(mKeyVals + numElementsWithBuffer); + std::memset(mInfo, 0, numBytesTotal - numElementsWithBuffer * sizeof(Node)); // set sentinel mInfo[numElementsWithBuffer] = 1; @@ -1981,86 +2338,34 @@ class Table mInfoHashShift = InitialInfoHashShift; } - template - typename std::enable_if::value, Q&>::type doCreateByKey(Arg&& key) { - while (true) { - size_t idx; - InfoType info; - keyToIdx(key, &idx, &info); - nextWhileLess(&info, &idx); - - // while we potentially have a match. Can't do a do-while here because when mInfo is 0 - // we don't want to skip forward - while (info == mInfo[idx]) { - if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { - // key already exists, do not insert. - return mKeyVals[idx].getSecond(); - } - next(&info, &idx); - } + enum class InsertionState { overflow_error, key_found, new_node, overwrite_node }; - // unlikely that this evaluates to true - if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - increase_size(); - continue; - } - - // key not found, so we are now exactly where we want to insert it. - auto const insertion_idx = idx; - auto const insertion_info = info; - if (ROBIN_HOOD_UNLIKELY(insertion_info + mInfoInc > 0xFF)) { - mMaxNumElementsAllowed = 0; - } - - // find an empty spot - while (0 != mInfo[idx]) { - next(&info, &idx); - } - - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - // put at empty spot. This forwards all arguments into the node where the object is - // constructed exactly where it is needed. - ::new (static_cast(&l)) - Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); - } else { - shiftUp(idx, insertion_idx); - l = Node(*this, std::piecewise_construct, - std::forward_as_tuple(std::forward(key)), std::forward_as_tuple()); - } - - // mKeyVals[idx].getFirst() = std::move(key); - mInfo[insertion_idx] = static_cast(insertion_info); - - ++mNumElements; - return mKeyVals[insertion_idx].getSecond(); - } - } - - // This is exactly the same code as operator[], except for the return values - template - std::pair doInsert(Arg&& keyval) { - while (true) { - size_t idx; - InfoType info; - keyToIdx(getFirstConst(keyval), &idx, &info); + // Finds key, and if not already present prepares a spot where to pot the key & value. + // This potentially shifts nodes out of the way, updates mInfo and number of inserted + // elements, so the only operation left to do is create/assign a new node at that spot. + template + std::pair insertKeyPrepareEmptySpot(OtherKey&& key) { + for (int i = 0; i < 256; ++i) { + size_t idx{}; + InfoType info{}; + keyToIdx(key, &idx, &info); nextWhileLess(&info, &idx); // while we potentially have a match while (info == mInfo[idx]) { - if (WKeyEqual::operator()(getFirstConst(keyval), mKeyVals[idx].getFirst())) { + if (WKeyEqual::operator()(key, mKeyVals[idx].getFirst())) { // key already exists, do NOT insert. // see http://en.cppreference.com/w/cpp/container/unordered_map/insert - return std::make_pair(iterator(mKeyVals + idx, mInfo + idx), - false); + return std::make_pair(idx, InsertionState::key_found); } next(&info, &idx); } // unlikely that this evaluates to true if (ROBIN_HOOD_UNLIKELY(mNumElements >= mMaxNumElementsAllowed)) { - increase_size(); + if (!increase_size()) { + return std::make_pair(size_t(0), InsertionState::overflow_error); + } continue; } @@ -2076,26 +2381,25 @@ class Table next(&info, &idx); } - auto& l = mKeyVals[insertion_idx]; - if (idx == insertion_idx) { - ::new (static_cast(&l)) Node(*this, std::forward(keyval)); - } else { + if (idx != insertion_idx) { shiftUp(idx, insertion_idx); - l = Node(*this, std::forward(keyval)); } - // put at empty spot mInfo[insertion_idx] = static_cast(insertion_info); - ++mNumElements; - return std::make_pair(iterator(mKeyVals + insertion_idx, mInfo + insertion_idx), true); + return std::make_pair(insertion_idx, idx == insertion_idx + ? InsertionState::new_node + : InsertionState::overwrite_node); } + + // enough attempts failed, so finally give up. + return std::make_pair(size_t(0), InsertionState::overflow_error); } bool try_increase_info() { ROBIN_HOOD_LOG("mInfoInc=" << mInfoInc << ", numElements=" << mNumElements << ", maxNumElementsAllowed=" - << calcMaxNumElementsAllowed(mMask + 1)); + << calcMaxNumElementsAllowed(mMask + 1)) if (mInfoInc <= 2) { // need to be > 2 so that shift works (otherwise undefined behavior!) return false; @@ -2120,28 +2424,41 @@ class Table return true; } - void increase_size() { + // True if resize was possible, false otherwise + bool increase_size() { // nothing allocated yet? just allocate InitialNumElements if (0 == mMask) { - init_data(InitialNumElements); - return; + initData(InitialNumElements); + return true; } auto const maxNumElementsAllowed = calcMaxNumElementsAllowed(mMask + 1); if (mNumElements < maxNumElementsAllowed && try_increase_info()) { - return; + return true; } ROBIN_HOOD_LOG("mNumElements=" << mNumElements << ", maxNumElementsAllowed=" << maxNumElementsAllowed << ", load=" << (static_cast(mNumElements) * 100.0 / - (static_cast(mMask) + 1))); - // it seems we have a really bad hash function! don't try to resize again + (static_cast(mMask) + 1))) + if (mNumElements * 2 < calcMaxNumElementsAllowed(mMask + 1)) { - throwOverflowError(); + // we have to resize, even though there would still be plenty of space left! + // Try to rehash instead. Delete freed memory so we don't steadyily increase mem in case + // we have to rehash a few times + nextHashMultiplier(); + rehashPowerOfTwo(mMask + 1, true); + } else { + // we've reached the capacity of the map, so the hash seems to work nice. Keep using it. + rehashPowerOfTwo((mMask + 1) * 2, false); } + return true; + } - rehashPowerOfTwo((mMask + 1) * 2); + void nextHashMultiplier() { + // adding an *even* number, so that the multiplier will always stay odd. This is necessary + // so that the hash stays a mixing function (and thus doesn't have any information loss). + mHashMultiplier += UINT64_C(0xc4ceb9fe1a85ec54); } void destroy() { @@ -2155,15 +2472,16 @@ class Table // This protection against not deleting mMask shouldn't be needed as it's sufficiently // protected with the 0==mMask check, but I have this anyways because g++ 7 otherwise - // reports a compile error: attempt to free a non-heap object ‘fm’ + // reports a compile error: attempt to free a non-heap object 'fm' // [-Werror=free-nonheap-object] - if (mKeyVals != reinterpret_cast(&mMask)) { - free(mKeyVals); + if (mKeyVals != reinterpret_cast_no_cast_align_warning(&mMask)) { + ROBIN_HOOD_LOG("std::free") + std::free(mKeyVals); } } void init() noexcept { - mKeyVals = reinterpret_cast(&mMask); + mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); mInfo = reinterpret_cast(&mMask); mNumElements = 0; mMask = 0; @@ -2173,14 +2491,15 @@ class Table } // members are sorted so no padding occurs - Node* mKeyVals = reinterpret_cast(&mMask); // 8 byte 8 - uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 16 - size_t mNumElements = 0; // 8 byte 24 - size_t mMask = 0; // 8 byte 32 - size_t mMaxNumElementsAllowed = 0; // 8 byte 40 - InfoType mInfoInc = InitialInfoInc; // 4 byte 44 - InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 48 - // 16 byte 56 if NodeAllocator + uint64_t mHashMultiplier = UINT64_C(0xc4ceb9fe1a85ec53); // 8 byte 8 + Node* mKeyVals = reinterpret_cast_no_cast_align_warning(&mMask); // 8 byte 16 + uint8_t* mInfo = reinterpret_cast(&mMask); // 8 byte 24 + size_t mNumElements = 0; // 8 byte 32 + size_t mMask = 0; // 8 byte 40 + size_t mMaxNumElementsAllowed = 0; // 8 byte 48 + InfoType mInfoInc = InitialInfoInc; // 4 byte 52 + InfoType mInfoHashShift = InitialInfoHashShift; // 4 byte 56 + // 16 byte 56 if NodeAllocator }; } // namespace detail diff --git a/setup.py b/setup.py index b218536..d8b2a22 100644 --- a/setup.py +++ b/setup.py @@ -172,7 +172,7 @@ def get_extra_args(): url="https://github.com/kcleal/dysgu", description="Structural variant calling", license="MIT", - version='1.3.9', + version='1.3.10', python_requires='>=3.7', install_requires=[ # runtime requires 'cython',