From a6bc38741d5c0453284583cacc319be3921ecf55 Mon Sep 17 00:00:00 2001 From: mnshgl0110 Date: Thu, 20 Jan 2022 08:33:34 +0100 Subject: [PATCH] Bug fix. Removed the use of np.int (as it is now depreciated). --- syri/pyxFiles/findsv.pyx | 55 +++++++++------------ syri/pyxFiles/inversions.pyx | 95 ++++-------------------------------- syri/pyxFiles/tdfunc.pyx | 22 ++++----- 3 files changed, 44 insertions(+), 128 deletions(-) diff --git a/syri/pyxFiles/findsv.pyx b/syri/pyxFiles/findsv.pyx index 9091732..ab8632d 100644 --- a/syri/pyxFiles/findsv.pyx +++ b/syri/pyxFiles/findsv.pyx @@ -167,9 +167,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): j_prop = abs(n) / (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2]) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -181,9 +181,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): j_prop = abs(n) / (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3]) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -257,9 +257,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): j_prop = abs(n) / (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2]) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -271,9 +271,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): j_prop = abs(n) / (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3]) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -328,9 +328,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): j_prop = abs(n) / (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2]) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -341,15 +341,10 @@ def getSV(cwdPath, allAlignments, prefix, offset): else: j_prop = abs(n) / (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3]) j1_prop = abs(n) / (blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3]) - # sCoord = round( - # blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype( - # int) - # eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - # blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) sCoord = round( - blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - j_prop * (blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + j1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["CPL", str(sCoord), str(eCoord), @@ -366,9 +361,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): if n >= offset: if ordered: sCoord = round( - blocksAlign.iat[j, 3] - j_prop * (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2])).astype(int) + blocksAlign.iat[j, 3] - j_prop * (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2])) eCoord = round(blocksAlign.iat[j + 1, 2] + j1_prop * ( - blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2])).astype(int) + blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2])) fout.write("\t".join(["CPG", str(blocksAlign.iat[j + 1, 0]), str(blocksAlign.iat[j, 1]), @@ -377,10 +372,8 @@ def getSV(cwdPath, allAlignments, prefix, offset): blocksAlign.iat[0, 5], blocksAlign.iat[0, 6]]) + "\n") else: - sCoord = round( - blocksAlign.iat[j, 3] + j_prop * (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3])).astype(int) - eCoord = round(blocksAlign.iat[j + 1, 2] - j1_prop * ( - blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3])).astype(int) + sCoord = round(blocksAlign.iat[j, 3] + j_prop * (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3])) + eCoord = round(blocksAlign.iat[j + 1, 2] - j1_prop * (blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3])) fout.write("\t".join(["CPG", str(blocksAlign.iat[j + 1, 0]), str(blocksAlign.iat[j, 1]), @@ -397,9 +390,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): if abs(m) > abs(n): if ordered: sCoord = round(blocksAlign.iat[j, 3] - j_prop * ( - blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2])).astype(int) + blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2])) eCoord = round(blocksAlign.iat[j + 1, 2] + j1_prop * ( - blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2])).astype(int) + blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2])) fout.write("\t".join(["TDM", str(blocksAlign.iat[j + 1, 0]), str(blocksAlign.iat[j, 1]), @@ -410,9 +403,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): else: sCoord = round(blocksAlign.iat[j, 3] + j_prop * ( - blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3])).astype(int) + blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3])) eCoord = round(blocksAlign.iat[j + 1, 2] - j1_prop * ( - blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3])).astype(int) + blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3])) fout.write("\t".join(["TDM", str(blocksAlign.iat[j + 1, 0]), str(blocksAlign.iat[j, 1]), @@ -425,9 +418,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): k_prop = abs(n) / (blocksAlign.iat[j, 3] - blocksAlign.iat[j, 2]) k1_prop = abs(n) / (blocksAlign.iat[j + 1, 3] - blocksAlign.iat[j + 1, 2]) sCoord = round(blocksAlign.iat[j, 1] - k_prop * ( - blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + k1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["TDM", str(sCoord), str(eCoord), @@ -439,9 +432,9 @@ def getSV(cwdPath, allAlignments, prefix, offset): k_prop = abs(n) / (blocksAlign.iat[j, 2] - blocksAlign.iat[j, 3]) k1_prop = abs(n) / (blocksAlign.iat[j + 1, 2] - blocksAlign.iat[j + 1, 3]) sCoord = round(blocksAlign.iat[j, 1] - k_prop * ( - blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])).astype(int) + blocksAlign.iat[j, 1] - blocksAlign.iat[j, 0])) eCoord = round(blocksAlign.iat[j + 1, 0] + k1_prop * ( - blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])).astype(int) + blocksAlign.iat[j + 1, 1] - blocksAlign.iat[j + 1, 0])) fout.write("\t".join(["TDM", str(sCoord), str(eCoord), diff --git a/syri/pyxFiles/inversions.pyx b/syri/pyxFiles/inversions.pyx index a730536..d5e1b64 100644 --- a/syri/pyxFiles/inversions.pyx +++ b/syri/pyxFiles/inversions.pyx @@ -66,7 +66,7 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long float maxscore, synscore float w, revenue, cost Py_ssize_t index - long[:] n = np.array(range(len(invblocks)), dtype=np.int) + long[:] n = np.array(range(len(invblocks)), dtype=int) long[:] topo long[:] source, target long[:] pred @@ -127,7 +127,7 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long print('Cycle found') sys.exit() - topo = np.array(toporder, dtype = np.int) + topo = np.array(toporder, dtype = int) n_topo = len(topo) # Get order in which the edges need to be transversed source = np.zeros_like(invG.es['source'], dtype=int) @@ -193,7 +193,7 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long if i in uniend: continue nodepath.clear() - pred = np.array([-1]* len(n), dtype = np.int) + pred = np.array([-1]* len(n), dtype = int) dist = np.array([np.float32('inf')]* len(n), dtype = np.float32) dist[i] = 0 # Process vertices in topological order @@ -321,10 +321,10 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long path.clear() lp = st.size() totscore = np.array([profit[i] for i in range( profit.size())], dtype=np.float32) - st_list = np.array([st[i] for i in range( st.size())], dtype=np.int) - end_list = np.array([end[i] for i in range( end.size())], dtype=np.int) - stb_list = np.array([stb[i] for i in range( stb.size())], dtype=np.int) - endb_list = np.array([endb[i] for i in range( endb.size())], dtype=np.int) + st_list = np.array([st[i] for i in range( st.size())], dtype=int) + end_list = np.array([end[i] for i in range( end.size())], dtype=int) + stb_list = np.array([stb[i] for i in range( stb.size())], dtype=int) + endb_list = np.array([endb[i] for i in range( endb.size())], dtype=int) profit_list = np.array([profit[i] for i in range( profit.size())], dtype=np.float32) parents = np.array([-1]*lp, dtype = 'int') @@ -338,84 +338,7 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long parents[j] = i else: break - - """ - Attempt to speed up goodinv identification, but not working as of now - - print('starting') - parentscore = np.zeros(lp, np.float32) - for i in range(lp): - canmap[st_list[i]][stb_list[i]].push_back(i) - staenda[st_list[i]][end_list[i]].push_back(i) - print('graph generated') - i = 0 - sorted_sa = np.zeros( canmap.size(), np.int64) - canmap_rit=canmap.rbegin() - while canmap_rit != canmap.rend(): - sorted_sa[i] = deref(canmap_rit).first - i+=1 - inc(canmap_rit) - sorted_sa = np.array(sorted(sorted_sa)) - print('Pruning graph') - for i in sorted_sa: - canmap_rit2 = canmap[i].rbegin() - while canmap_rit2 != canmap[i].rend(): - minparentscore[i][deref(canmap_rit2).first] = 0 - inc(canmap_rit2) - - print(np.array(sorted_sa)) - count = 0 - changed=False - for i in sorted_sa: - if i==12592489: - print(i, str(datetime.now())) - count+=1 - staenda_it2 = staenda[i].rbegin() - while staenda_it2 != staenda[i].rend(): - if i==12592489: - print(i, deref(staenda_it2).first, deref(staenda_it2).second.size()) - for j in range( deref(staenda_it2).second.size()): - current = deref(staenda_it2).second[j] - canmap_rit = canmap.rbegin() - while canmap_rit != canmap.rend(): - if deref(canmap_rit).first > end_list[current]-threshold: - if i==12592489: - print(i, deref(staenda_it2).first, deref(staenda_it2).second.size(), deref(canmap_rit).first) - canmap_rit2 = deref(canmap_rit).second.rbegin() - while canmap_rit2 != deref(canmap_rit).second.rend(): - if i==12592489 and deref(staenda_it2).first == 12989685 and deref(canmap_rit).first == 13050974 and deref(canmap_rit2).first == 12368020: - print('gg parent set1') - print(i, deref(staenda_it2).first, deref(staenda_it2).second.size(), deref(canmap_rit).first) - if totscore[current] > minparentscore[deref(canmap_rit).first][deref(canmap_rit2).first]: - changed = False - if deref(canmap_rit2).first > endb_list[current]-threshold: - if i==12592489 and deref(staenda_it2).first == 12989685 and deref(canmap_rit).first == 13050974 and deref(canmap_rit2).first == 12368020: - print('gg parent set2') - print(i, deref(staenda_it2).first, deref(staenda_it2).second.size(), deref(canmap_rit).first) - for k in range( deref(canmap_rit2).second.size()): - if parentscore[deref(canmap_rit2).second[k]] < totscore[current]: - if i==12592489 and deref(staenda_it2).first == 12989685 and deref(canmap_rit).first == 13050974 and deref(canmap_rit2).first == 12368020: - print('gg parent set3') - print(i, deref(staenda_it2).first, deref(staenda_it2).second.size(), deref(canmap_rit).first) - changed = True - totscore[deref(canmap_rit2).second[k]] = totscore[current] + profit_list[deref(canmap_rit2).second[k]] - parentscore[deref(canmap_rit2).second[k]] = totscore[current] - parents[deref(canmap_rit2).second[k]] = current - if changed: - minvalue = np.inf - for k in range( deref(canmap_rit2).second.size()): - if parentscore[deref(canmap_rit2).second[k]] < minvalue: - minvalue = parentscore[deref(canmap_rit2).second[k]] - minparentscore[deref(canmap_rit).first][deref(canmap_rit2).first] = minvalue - else: - break - inc(canmap_rit2) - else: - break - inc(canmap_rit) - inc(staenda_it2) - """ - + maxid = -1 maxscore = -1 for i in range(lp): @@ -435,7 +358,7 @@ cdef getProfitable(invblocks, long[:] aStart, long[:] aEnd, long[:] bStart, long if i in uniend: continue nodepath.clear() - pred = np.array([-1]* len(n), dtype = np.int) + pred = np.array([-1]* len(n), dtype = int) dist = np.array([np.float32('inf')]* len(n), dtype = np.float32) dist[i] = 0 diff --git a/syri/pyxFiles/tdfunc.pyx b/syri/pyxFiles/tdfunc.pyx index 5124075..bbf5da8 100644 --- a/syri/pyxFiles/tdfunc.pyx +++ b/syri/pyxFiles/tdfunc.pyx @@ -629,7 +629,7 @@ def getCTX(coords, cwdPath, uniChromo, threshold, bRT, prefix, tUC, tUP, nCores, bGroups = {} for i in range(len(ctxTransGenomeBGroups)): bGroups[i] = ctxTransBlocks.iloc[ctxTransGenomeBGroups[i].member].sort_values(['bStart', 'bEnd']).index.values - clstrsize = np.array([len(ctxCluster[i.transClusterIndex]) for i in ctxBlocksData], np.int) + clstrsize = np.array([len(ctxCluster[i.transClusterIndex]) for i in ctxBlocksData], int) if len(ctxTransBlocks) > 0: out = getmeblocks(np.array(ctxTransBlocks.aStart), @@ -980,10 +980,10 @@ cpdef getProfitableTrans(cpp_map[long, cpp_set[long]] graph, long[:] astart, lon else: unchrs = np.unique(list(np.unique(achr)) + list(np.unique(bchr)) + list(np.unique(inachr)) + list(np.unique(inbchr))) unchrdict = {unchrs[i]:i for i in range(len(unchrs))} - achrint = np.array([unchrdict[c] for c in achr], np.int) - bchrint = np.array([unchrdict[c] for c in bchr], np.int) - inachrint = np.array([unchrdict[c] for c in inachr], np.int) - inbchrint = np.array([unchrdict[c] for c in inbchr], np.int) + achrint = np.array([unchrdict[c] for c in achr], int) + bchrint = np.array([unchrdict[c] for c in bchr], int) + inachrint = np.array([unchrdict[c] for c in inachr], int) + inbchrint = np.array([unchrdict[c] for c in inbchr], int) ## For each alignment/node calculate the number of bases which are not overlapping with the in-place blocks @@ -1036,12 +1036,12 @@ cpdef getProfitableTrans(cpp_map[long, cpp_set[long]] graph, long[:] astart, lon bst = ben+1 break bscore += ben - bst + 1 - almntdata[i] = np.array([aend[i]-astart[i]+1, bend[i]-bstart[i]+1, ascore, bscore], dtype=np.int) + almntdata[i] = np.array([aend[i]-astart[i]+1, bend[i]-bstart[i]+1, ascore, bscore], dtype=int) out = deque() count = 0 dist_bkp = np.array([-np.float32('inf')]*len(n), dtype = np.float32) - pred_bkp = np.array([-1]*len(n), dtype = np.int) + pred_bkp = np.array([-1]*len(n), dtype = int) for i in n: nodepath.clear() pred = pred_bkp.copy() @@ -1501,7 +1501,7 @@ cdef greedySubsetSelectorHeuristic(long[:] cluster, transBlocksData, long[:] see skiplist[i]=1 break - skipindexmap = np.zeros(n, np.int) ## smallest index for a tempcluster for which to check the skiplist + skipindexmap = np.zeros(n, int) ## smallest index for a tempcluster for which to check the skiplist ## For a given candidate, if all of its ME candidates have already been added to the skiplist, then that candidate will be selected as part of output for i in range(n): if tempcluster[i] ==0: @@ -1721,7 +1721,7 @@ cdef greedySubsetSelector(cluster, transBlocksData, seedblocks, iterCount = 100) outblocks[seedblocks[i]] = 1 tempcluster[seedblocks[i]] = 0 transBlocksScore = {} - for i in np.nonzero(tempcluster)[0].astype(np.int): + for i in np.nonzero(tempcluster)[0].astype(int): transBlocksScore[i] = (transBlocksData[i].aEnd - transBlocksData[i].aStart) + (transBlocksData[i].bEnd - transBlocksData[i].bStart) # Find best cluster subset @@ -1851,10 +1851,10 @@ def getBestClusterSubset(cluster, transBlocksData, bRT, tdolp, chromo='', aGroup seedBlocks = [i for i in cluster if transBlocksData[i].status == 1] if len(cluster) > 100000: logger.info('Large (>100000 candidates) TD cluster (with '+ str(len(cluster)) +' candidate TDs) identified. Using low-memory high-runtime approach. Iterative sampling disabled. Using less stringent progressive elimination.') - output = greedySubsetSelectorHeuristic(np.array(cluster, np.int), transBlocksData, np.array(seedBlocks, np.int), aGroups, bGroups, threshold, tdolp) + output = greedySubsetSelectorHeuristic(np.array(cluster, int), transBlocksData, np.array(seedBlocks, int), aGroups, bGroups, threshold, tdolp) elif len(cluster) > 10000: logger.info('Large (>10000 candidates) TD cluster (with '+ str(len(cluster)) +' candidate TDs) identified. Using low-memory high-runtime approach. Iterative sampling disabled.') - output = greedySubsetSelector2(np.array(cluster, np.int), transBlocksData, np.array(seedBlocks, np.int), aGroups, bGroups, threshold, tdolp) + output = greedySubsetSelector2(np.array(cluster, int), transBlocksData, np.array(seedBlocks, int), aGroups, bGroups, threshold, tdolp) elif len(cluster) < 50: output = bruteSubsetSelector(cluster, transBlocksData, seedBlocks, bRT) if output == "Failed":