Skip to content

Commit

Permalink
More tests and javadoc.
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad committed Aug 27, 2024
1 parent 0184bb5 commit 5a10c0c
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ public Object instanceMainPostParseArgs() {
tmpDir = new GATKPath(System.getProperty("java.io.tmpdir"));
}

// Build the default headers
// Build the defauNlt headers
final ZonedDateTime startDateTime = ZonedDateTime.now();
this.defaultHeaders.add(new StringHeader(commandLine));
this.defaultHeaders.add(new StringHeader("Started on: " + Utils.getDateTimeForDisplay(startDateTime)));
Expand Down
198 changes: 129 additions & 69 deletions src/main/java/org/broadinstitute/hellbender/tools/CreateBundle.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.broadinstitute.hellbender.engine;

import htsjdk.beta.io.IOPathUtils;
import htsjdk.beta.io.bundle.Bundle;
import htsjdk.beta.io.bundle.BundleJSON;
import htsjdk.io.IOPath;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.IOException;

public class BundleSupportIntegrationTest extends GATKBaseTest {

// this test uses a serialized bundle file to ensure that we don't unintentionally pick up any
// code (like, from htsjdk) that introduces backward compatibility issues
@Test
public void testReadWriteSerializedReferenceBundle() throws IOException {
// This test file contains absolute paths to files on a local dev machine, so it shouldn't really be used
// for anything other than this test, since the absolute paths are unlikely to work on any other machine.
// But here we just want to make sure we can consume and roundtrip it without error
final IOPath testBundleFilePath = new GATKPath("src/test/resources/org/broadinstitute/hellbender/engine/print_reads_bundle_do_not_use.json");

// get our test bundle from the file (ensure we canparse it), then write it out to a temp file, read it back
// in, and compare
final Bundle testBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
final IOPath roundTrippedBundleFilePath = new GATKPath(
createTempPath("testReadWriteSerializedReferenceBundle", ".json").toString());
IOPathUtils.writeStringToPath(roundTrippedBundleFilePath, BundleJSON.toJSON(testBundle));
final Bundle roundTrippedBundle = BundleJSON.toBundle(IOPathUtils.getStringFromPath(testBundleFilePath));
Assert.assertTrue(Bundle.equalsIgnoreOrder(roundTrippedBundle, testBundle));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,7 @@ private static IOPath createRemoteBundleForFile(
final File index1,
final File vcf2,
final File index2) throws IOException {
//TODO: replace this path with getGCPTestStaging()
final String remotePath = BucketUtils.randomRemotePath("gs://hellbender/test/staging/remoteBundles", "remote_bundle_test", "dir");
final String remotePath = BucketUtils.randomRemotePath(getGCPTestStaging() + "remoteBundles", "remote_bundle_test", "dir");
final Path remoteDirPath = IOUtils.getPath(remotePath + "/");

Files.createDirectory(remoteDirPath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import htsjdk.beta.io.bundle.*;
import htsjdk.beta.plugin.IOUtils;
import htsjdk.beta.plugin.variants.VariantsBundle;
import org.broadinstitute.barclay.argparser.CommandLineException;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
Expand All @@ -13,24 +14,28 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

public class CreateBundleIntegrationTest extends CommandLineProgramTest {

// force our local paths to use absolute path names to make BundleResource and IOPath equality checks easier,
// since once a bundle is round-tripped/serialized to JSON, the resources will always contain absolute path names
// for local files

//NOTE: These variables are Strings, but they are initialized to Strings obtained by first creating a GATKPath,
// and then calling getURIString on the resulting object. This is just shortcut to normalize them so they
// match the strings that will be embedded in the bundles created by the CreateBundle tool (i.e., to have
// full/absolute paths and protocol schemes).
private final static String LOCAL_VCF = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf").getURIString();
private final static String LOCAL_VCF_IDX = new GATKPath(getTestDataDir() + "/count_variants_withSequenceDict.vcf.idx").getURIString();
private final static String LOCAL_VCF_GZIP = new GATKPath("src/test/resources/large/NA24385.vcf.gz").getURIString();
private final static String LOCAL_VCF_TBI = new GATKPath("src/test/resources/large/NA24385.vcf.gz.tbi").getURIString();
private final static String LOCAL_VCF_WITH_NO_INDEX = new GATKPath("src/test/resources/org/broadinstitute/hellbender/tools/count_variants_withSequenceDict_noIndex.vcf").getURIString();
private final static String CLOUD_VCF = "gs://hellbender/test/resources/large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf";
private final static String CLOUD_VCF_IDX = "gs://hellbender/test/resources/large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx";
private final static String CLOUD_VCF = GCS_GATK_TEST_RESOURCES + "large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf";
private final static String CLOUD_VCF_IDX = GCS_GATK_TEST_RESOURCES + "large/1000G.phase3.broad.withGenotypes.chr20.10100000.vcf.idx";

private final static String LOCAL_FASTA = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta";
private final static String LOCAL_FASTA_INDEX = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai";
private final static String LOCAL_FASTA_DICT = "src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.dict";
private final static String LOCAL_FASTA = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta").getURIString();
private final static String LOCAL_FASTA_INDEX = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.fasta.fai").getURIString();
private final static String LOCAL_FASTA_DICT = new GATKPath("src/test/resources/large/Homo_sapiens_assembly38.20.21.dict").getURIString();

private final static String CUSTOM_PRIMARY_CT = "primary_ct";
private final static String CUSTOM_SECONDARY_CT = "secondary_ct";
Expand All @@ -41,22 +46,21 @@ public Object[][] bundleCases() {
return new Object[][] {
// primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressResourceResolution, expectedBundle

// VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED content types
// VCF bundle cases, with AUTOMATIC secondary resolution, and INFERRED primary content types
{LOCAL_VCF, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF, null, LOCAL_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF_GZIP, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, BundleResourceType.CT_VARIANTS_INDEX, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
{CLOUD_VCF, null, null, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
{CLOUD_VCF, null, CLOUD_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},

// VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED content types
// VCF bundle cases, with SUPPRESSED secondary resolution, and INFERRED primary content types
{LOCAL_VCF, null, null, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF))},
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
// local vcf that has no index, but since suppressSecondaryResourceResolution is true, we don't throw since we don't try to infer the index
{LOCAL_VCF_WITH_NO_INDEX, null, null, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_WITH_NO_INDEX))},
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
{CLOUD_VCF, null, null, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF))},
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},

// VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT content types
// VCF bundle cases, with AUTOMATIC secondary resolution, and EXPLICIT primary content types
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, LOCAL_VCF_IDX, BundleResourceType.CT_VARIANTS_INDEX, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},

Expand All @@ -70,9 +74,24 @@ public Object[][] bundleCases() {
.addPrimary(new IOPathResource(new GATKPath(LOCAL_VCF), BundleResourceType.CT_VARIANT_CONTEXTS))
.addSecondary(new IOPathResource(new GATKPath(LOCAL_VCF_IDX), BundleResourceType.CT_VARIANTS_INDEX))
.addSecondary(new IOPathResource(new GATKPath(new GATKPath("someVariantsCompanion.txt").getURIString()), "someVariantsCT"))
.build()},
.build()
},

// reference bundles
{ LOCAL_FASTA, null, null, null, null, null, false,
new BundleBuilder()
.addPrimary(new IOPathResource(new GATKPath(LOCAL_FASTA), BundleResourceType.CT_HAPLOID_REFERENCE))
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_INDEX), BundleResourceType.CT_REFERENCE_INDEX))
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_DICT), BundleResourceType.CT_REFERENCE_DICTIONARY))
.build()
},
{ LOCAL_FASTA, BundleResourceType.CT_HAPLOID_REFERENCE, LOCAL_FASTA_INDEX, BundleResourceType.CT_REFERENCE_INDEX, Arrays.asList(LOCAL_FASTA_DICT), Arrays.asList(BundleResourceType.CT_REFERENCE_DICTIONARY), false,
new BundleBuilder()
.addPrimary(new IOPathResource(new GATKPath(LOCAL_FASTA), BundleResourceType.CT_HAPLOID_REFERENCE))
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_INDEX), BundleResourceType.CT_REFERENCE_INDEX))
.addSecondary(new IOPathResource(new GATKPath(LOCAL_FASTA_DICT), BundleResourceType.CT_REFERENCE_DICTIONARY))
.build()
},

// "custom" bundles
{
Expand Down Expand Up @@ -105,17 +124,26 @@ public Object[][] negativeBundleCases() {
return new Object[][] {
// primary, primary tag, secondary, secondary tag, other(s), other tag(s), suppressIndexResolution, expectedBundle

// no index file can be inferred
// no vcf index file can be inferred
{LOCAL_VCF_WITH_NO_INDEX, null, null, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF_WITH_NO_INDEX))},
// vcf bundle with secondary/other content type not explicitly provided
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, Arrays.asList("other.txt"), null, false, null},
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF, null, LOCAL_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF), new GATKPath(LOCAL_VCF_IDX))},
{LOCAL_VCF_GZIP, null, LOCAL_VCF_TBI, null, null, null, true, new VariantsBundle(new GATKPath(LOCAL_VCF_GZIP), new GATKPath(LOCAL_VCF_TBI))},
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, false, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
{CLOUD_VCF, null, CLOUD_VCF_IDX, null, null, null, true, new VariantsBundle(new GATKPath(CLOUD_VCF), new GATKPath(CLOUD_VCF_IDX))},
// primary content type not provided, and cannot be inferred from the extension
{"primaryFile.ext", null, null, null, null, null, false, null},
// secondary content type not provided
{"primaryFile.ext", CUSTOM_PRIMARY_CT, "secondaryFile.ext", null, null, null, false, null},

// reference input with unknown content type specified
{ LOCAL_FASTA, null, LOCAL_FASTA_INDEX, "unknown", null, null, false, null},

// other bundle with other content type not provided
{"primaryFile.ext", CUSTOM_PRIMARY_CT, "secondaryFile.ext", CUSTOM_SECONDARY_CT, Arrays.asList("other.txt"), null, false, null},
// vcf bundle with other content type not provided
{LOCAL_VCF, BundleResourceType.CT_VARIANT_CONTEXTS, null, null, Arrays.asList("other.txt"), null, false, null},

};
}

Expand Down Expand Up @@ -145,6 +173,17 @@ public void testNegativeBundleCases(
doCreateBundleTest (primaryInput, primaryInputTag, secondaryInput, secondaryInputTag, otherInputs, otherInputTags, suppressResourceResolution, expectedBundle);
}

@Test(expectedExceptions={CommandLineException.class})
public void testRequireBundleExtension() {
final GATKPath outputPath = new GATKPath(createTempFile("test", ".bundle.BOGUS").getAbsolutePath().toString());
final List<String> args = new ArrayList<>();
args.add("--" + StandardArgumentDefinitions.PRIMARY_RESOURCE_LONG_NAME);
args.add(LOCAL_FASTA);
args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME);
args.add(outputPath.toString());
runCommandLine(args);
}

private void doCreateBundleTest(
final String primaryInput,
final String primaryInputTag,
Expand Down Expand Up @@ -176,10 +215,6 @@ private void doCreateBundleTest(
args.add("--" + StandardArgumentDefinitions.OUTPUT_LONG_NAME);
args.add(outputPath.toString());

System.out.println();
System.out.println(args.stream().collect(Collectors.joining("\n ")));
System.out.println();

runCommandLine(args);

final Bundle actualBundle = BundleJSON.toBundle(IOUtils.getStringFromPath(outputPath), GATKPath::new);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,29 @@
package org.broadinstitute.hellbender.tools;

import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
import htsjdk.beta.io.IOPathUtils;
import htsjdk.beta.io.bundle.*;
import htsjdk.io.IOPath;
import htsjdk.samtools.*;
import htsjdk.samtools.cram.ref.ReferenceSource;
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.GATKPath;
import org.broadinstitute.hellbender.engine.ReadsDataSource;
import org.broadinstitute.hellbender.engine.ReadsPathDataSource;
import org.broadinstitute.hellbender.testutils.ArgumentsBuilder;
import org.broadinstitute.hellbender.testutils.SamAssertionUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.gcs.BucketUtils;
import org.broadinstitute.hellbender.utils.io.IOUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -104,4 +111,50 @@ public void testHttpPaths(String reads, String index, String nonHttpReads, Strin
SamAssertionUtils.assertEqualBamFiles(out, out2, false, ValidationStringency.DEFAULT_STRINGENCY);
}

@Test(groups = {"cloud"})
public void testPrintReadsWithReferenceBundle() throws IOException {
// test that both reading and writing a cram work when the reference is specified via a bundle where the
// reference, reference index, and reference dictionary are all in different buckets
final IOPath testFastaFile = new GATKPath(getTestDataDir() + "/print_reads.fasta");
final IOPath testIndexFile = new GATKPath(getTestDataDir() + "/print_reads.fasta.fai");
final IOPath testDictFile = new GATKPath(getTestDataDir() + "/print_reads.dict");

final String targetBucketName = BucketUtils.randomRemotePath(getGCPTestStaging(), "testPrintReadsWithReferenceBundle", "") + "/";
final IOPath targetBucket = new GATKPath(targetBucketName);
IOUtils.deleteOnExit(targetBucket.toPath());

final Path remoteFasta = Files.copy(testFastaFile.toPath(), new GATKPath(targetBucketName + "print_reads.fasta").toPath());
final IOPath targetIndex = new GATKPath(targetBucketName + "refindex/print_reads.fasta.fai");
final Path remoteFastaIndex = Files.copy(testIndexFile.toPath(), targetIndex.toPath());
final IOPath targetDict = new GATKPath(targetBucketName + "refdict/print_reads.dict");
final Path remoteFastaDict = Files.copy(testDictFile.toPath(), targetDict.toPath());

// create a bundle with the remote reference, index, and dict files
final Bundle refBundle = new BundleBuilder()
.addPrimary(new IOPathResource(new GATKPath(remoteFasta.toUri().toString()), BundleResourceType.CT_HAPLOID_REFERENCE))
.addSecondary(new IOPathResource(new GATKPath(remoteFastaIndex.toUri().toString()), BundleResourceType.CT_REFERENCE_INDEX))
.addSecondary(new IOPathResource(new GATKPath(remoteFastaDict.toUri().toString()), BundleResourceType.CT_REFERENCE_DICTIONARY))
.build();
final IOPath bundleFilePath = new GATKPath(targetBucketName + "refBundle.json");
IOPathUtils.writeStringToPath(bundleFilePath, BundleJSON.toJSON(refBundle));

final IOPath targetOutCRAM = new GATKPath(IOUtils.createTempFile("testReferenceSequenceForNioBundle", ".cram").getAbsolutePath());
final ArgumentsBuilder args = new ArgumentsBuilder()
.addInput(getTestDataDir() + "/print_reads.cram")
.addReference(bundleFilePath.toString())
.addOutput(targetOutCRAM.toString());
runCommandLine(args);

int count = 0;
try (final SamReader in = SamReaderFactory.makeDefault()
.validationStringency(ValidationStringency.SILENT)
.referenceSource(new ReferenceSource(bundleFilePath.toPath()))
.open(targetOutCRAM.toPath())) {
for (@SuppressWarnings("unused") final SAMRecord rec : in) {
count++;
}
}
Assert.assertEquals(count, 8);
}

}
Loading

0 comments on commit 5a10c0c

Please sign in to comment.