-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
187 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
src/de/mpi_cbg/revant/apps/GetAlignmentLengthThreshold.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
package de.mpi_cbg.revant.apps; | ||
|
||
import java.io.*; | ||
import de.mpi_cbg.revant.factorize.Alignments; | ||
import de.mpi_cbg.revant.util.Math; | ||
import de.mpi_cbg.revant.util.IO; | ||
|
||
/** | ||
* Prints to STDOUT the max length of an alignment of a given type. This is used for later | ||
* enumerating only k-mers that are short enough to be contained in a suffix-prefix | ||
* alignment. | ||
* | ||
* Remark: one could return instead the min L such that, say, 90% of all alignments have | ||
* length <=L. However, using such a threshold to avoid considering unique k-mers does | ||
* make the assembly graph less connected in practice. | ||
*/ | ||
public class GetAlignmentLengthThreshold { | ||
/** | ||
* @param args | ||
* 1: alignment type: 0=suffix-prefix overlap; 1=local substring; 2=full containment | ||
* or full identity; | ||
* 4: histogram of all alignment lengths. | ||
*/ | ||
public static void main(String[] args) throws IOException { | ||
final String ALIGNMENTS_FILE = args[0]; | ||
final int TYPE = Integer.parseInt(args[1]); | ||
final int AVG_READ_LENGTH = Integer.parseInt(args[2]); | ||
final String OUTPUT_HISTOGRAM = args[3]; | ||
|
||
final int QUANTUM = IO.quantum; | ||
final int N_CELLS = AVG_READ_LENGTH/QUANTUM; | ||
final int IDENTITY_THRESHOLD = QUANTUM; | ||
|
||
int i; | ||
int length, lengthA, lengthB, max; | ||
long nAlignments; | ||
String str; | ||
BufferedReader br; | ||
BufferedWriter bw; | ||
long[] histogram; | ||
|
||
histogram = new long[N_CELLS]; | ||
br = new BufferedReader(new FileReader(ALIGNMENTS_FILE)); | ||
str=br.readLine(); str=br.readLine(); // Skipping header | ||
str=br.readLine(); | ||
max=0; | ||
while (str!=null) { | ||
Alignments.readAlignmentFile(str); | ||
if (Alignments.readAlignmentFile_getType(IDENTITY_THRESHOLD,str)==TYPE) { | ||
lengthA=Alignments.endA-Alignments.startA+1; | ||
lengthB=Alignments.endB-Alignments.startB+1; | ||
length=Math.max(lengthA,lengthB); | ||
max=Math.max(max,length); | ||
histogram[Math.min(length/QUANTUM,N_CELLS-1)]++; | ||
} | ||
str=br.readLine(); | ||
} | ||
br.close(); | ||
bw = new BufferedWriter(new FileWriter(OUTPUT_HISTOGRAM)); | ||
for (i=0; i<N_CELLS; i++) bw.write((i*QUANTUM)+","+histogram[i]+"\n"); | ||
bw.write(max+",-1\n"); | ||
bw.close(); | ||
System.out.println(max+""); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.