-
Notifications
You must be signed in to change notification settings - Fork 592
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NIO output support for ApplyBQSR #4424
Changes from all commits
2536af7
b7c0c00
9d0403b
ed3632a
077e85f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,9 @@ | |
import com.google.common.primitives.Ints; | ||
import htsjdk.samtools.SAMFileHeader; | ||
import htsjdk.tribble.util.ParsingUtils; | ||
import java.io.FileNotFoundException; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import org.apache.commons.io.FileUtils; | ||
import org.apache.commons.lang3.ArrayUtils; | ||
import org.apache.commons.lang3.StringUtils; | ||
|
@@ -524,12 +527,40 @@ public static String calcMD5(final byte[] bytes) { | |
/** | ||
* Calculates the MD5 for the specified file and returns it as a String | ||
* | ||
* Warning: this loads the whole file into memory, so it's not suitable | ||
* for large files. | ||
* | ||
* @param file file whose MD5 to calculate | ||
* @return file's MD5 in String form | ||
* @throws IOException if the file could not be read | ||
*/ | ||
public static String calculateFileMD5( final File file ) throws IOException{ | ||
return Utils.calcMD5(FileUtils.readFileToByteArray(file)); | ||
return calculatePathMD5(file.toPath()); | ||
} | ||
|
||
/** | ||
* Calculates the MD5 for the specified file and returns it as a String | ||
* | ||
* Warning: this loads the whole file into memory, so it's not suitable | ||
* for large files. | ||
* | ||
* @param path file whose MD5 to calculate | ||
* @return file's MD5 in String form | ||
* @throws IOException if the file could not be read | ||
*/ | ||
public static String calculatePathMD5(final Path path) throws IOException{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a warning to the javadoc for this method that it slurps the entire file into memory, and should not be used for large files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done! |
||
// This doesn't have as nice error messages as FileUtils, but it's close. | ||
String fname = path.toUri().toString(); | ||
if (!Files.exists(path)) { | ||
throw new FileNotFoundException("File '" + fname + "' does not exist"); | ||
} | ||
if (Files.isDirectory(path)) { | ||
throw new IOException("File '" + fname + "' exists but is a directory"); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this check Files.isRegularFile() as well? It seems wrong to try to take the md5 of a pipe or something like that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, done. |
||
if (!Files.isRegularFile(path)) { | ||
throw new IOException("File '" + fname + "' exists but is not a regular file"); | ||
} | ||
return Utils.calcMD5(Files.readAllBytes(path)); | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,10 +14,8 @@ | |
import htsjdk.samtools.SAMUtils; | ||
import htsjdk.samtools.SamStreams; | ||
import htsjdk.samtools.cram.build.CramIO; | ||
import java.io.BufferedInputStream; | ||
import java.io.File; | ||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.*; | ||
import java.nio.file.Files; | ||
import java.nio.file.OpenOption; | ||
import java.nio.file.Path; | ||
import java.util.Arrays; | ||
|
@@ -1131,19 +1129,31 @@ public static SAMFileWriter createCommonSAMWriterFromFactory( | |
* Validate that a file has CRAM contents by checking that it has a valid CRAM file header | ||
* (no matter what the extension). | ||
* | ||
* @param putativeCRAMFile File to check. | ||
* @param putativeCRAMPath File to check. | ||
* @return true if the file has a valid CRAM file header, otherwise false | ||
*/ | ||
public static boolean hasCRAMFileContents(final File putativeCRAMFile) { | ||
try (final FileInputStream fileStream = new FileInputStream(putativeCRAMFile); | ||
final BufferedInputStream bis = new BufferedInputStream(fileStream)) { | ||
return SamStreams.isCRAMFile(bis); | ||
public static boolean hasCRAMFileContents(final Path putativeCRAMPath) { | ||
try (final InputStream fileStream = Files.newInputStream(putativeCRAMPath)) { | ||
try (final BufferedInputStream bis = new BufferedInputStream(fileStream)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's better to have 2 resources declared in the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't compile that way, complains about an IOException. Suggestions welcome, though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We tried it, and it does actually work as a single |
||
return SamStreams.isCRAMFile(bis); | ||
} | ||
} | ||
catch (IOException e) { | ||
throw new UserException.CouldNotReadInputFile(e.getMessage()); | ||
} | ||
} | ||
|
||
/** | ||
* Validate that a file has CRAM contents by checking that it has a valid CRAM file header | ||
* (no matter what the extension). | ||
* | ||
* @param putativeCRAMFile File to check. | ||
* @return true if the file has a valid CRAM file header, otherwise false | ||
*/ | ||
public static boolean hasCRAMFileContents(final File putativeCRAMFile) { | ||
return hasCRAMFileContents(putativeCRAMFile.toPath()); | ||
} | ||
|
||
public static boolean isNonPrimary(GATKRead read) { | ||
return read.isSecondaryAlignment() || read.isSupplementaryAlignment() || read.isUnmapped(); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,12 +61,12 @@ public Object[][] testCRAMContentsFailData() { | |
|
||
@Test(dataProvider = "testCRAMContentsSucceed") | ||
public void testAssertCRAMContentsSucceed(File putativeCRAMFile) { | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile); | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); | ||
} | ||
|
||
@Test(dataProvider = "testCRAMContentsFail", expectedExceptions=AssertionError.class) | ||
public void testAssertCRAMContentsFail(File putativeCRAMFile) { | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile); | ||
SamAssertionUtils.assertCRAMContents(putativeCRAMFile.toPath()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test the file-based overloads as well There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's no file-based overload of |
||
} | ||
|
||
@DataProvider(name="testCRAMContentsIfCRAMSucceed") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a warning to the javadoc for this method that it slurps the entire file into memory, and should not be used for large files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done!