-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
228 additions
and
42 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
97 changes: 97 additions & 0 deletions
97
...opencb/biodata/tools/variant/converters/avro/VCFHeaderToVariantFileMetadataConverter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package org.opencb.biodata.tools.variant.converters.avro; | ||
|
||
import htsjdk.variant.vcf.VCFConstants; | ||
import htsjdk.variant.vcf.VCFHeader; | ||
import htsjdk.variant.vcf.VCFHeaderLine; | ||
import htsjdk.variant.vcf.VCFSampleHeaderLine; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.opencb.biodata.models.variant.VariantFileMetadata; | ||
import org.opencb.biodata.models.variant.metadata.VariantFileHeader; | ||
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine; | ||
|
||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
||
public class VCFHeaderToVariantFileMetadataConverter { | ||
|
||
public VariantFileMetadata convert(VCFHeader header, String id, String path) { | ||
return convert(header, new VariantFileMetadata(id, path)); | ||
} | ||
|
||
public VariantFileMetadata convert(VCFHeader header, VariantFileMetadata variantFileMetadata) { | ||
VariantFileHeader variantFileHeader = new VCFHeaderToVariantFileHeaderConverter().convert(header); | ||
Map<String, String> sampleMapping = getSampleMapping(variantFileHeader); | ||
List<String> samples = getSamples(header, sampleMapping); | ||
|
||
// Create converters and fill VariantSource | ||
variantFileMetadata.setHeader(variantFileHeader); | ||
variantFileMetadata.setSampleIds(samples); | ||
if (variantFileMetadata.getAttributes() == null) { | ||
variantFileMetadata.setAttributes(new HashMap<>()); | ||
} | ||
variantFileMetadata.getAttributes().put("originalSamples", String.join(",", header.getGenotypeSamples())); | ||
|
||
return variantFileMetadata; | ||
} | ||
|
||
public List<String> getSamples(VCFHeader header, Map<String, String> sampleNameMapping) { | ||
if (sampleNameMapping == null) { | ||
sampleNameMapping = Collections.emptyMap(); | ||
} | ||
|
||
List<String> samplesInOriginalOrder = header.getGenotypeSamples(); | ||
List<String> renamedSamples = new ArrayList<>(samplesInOriginalOrder.size()); | ||
for (String sample : samplesInOriginalOrder) { | ||
renamedSamples.add(sampleNameMapping.getOrDefault(sample, sample)); | ||
} | ||
|
||
return renamedSamples; | ||
} | ||
|
||
public Map<String, String> getSampleMapping(VCFHeader header) { | ||
Map<String, String> sampleNameMapping = new HashMap<>(); | ||
for (VCFHeaderLine line : header.getMetaDataInInputOrder()) { | ||
if (line instanceof VCFSampleHeaderLine) { | ||
VCFSampleHeaderLine sampleHeaderLine = (VCFSampleHeaderLine) line; | ||
getActualSampleName(sampleHeaderLine.getID(), sampleHeaderLine.getGenericFields(), sampleNameMapping); | ||
} | ||
} | ||
return sampleNameMapping; | ||
} | ||
|
||
public Map<String, String> getSampleMapping(VariantFileHeader header) { | ||
Map<String, String> sampleNameMapping = new HashMap<>(); | ||
for (VariantFileHeaderComplexLine line : header.getComplexLines()) { | ||
if (line.getKey().equals(VCFConstants.SAMPLE_HEADER_KEY)) { | ||
getActualSampleName(line.getId(), line.getGenericFields(), sampleNameMapping); | ||
} | ||
} | ||
return sampleNameMapping; | ||
} | ||
|
||
private void getActualSampleName(String id, Map<String, String> genericFields, Map<String, String> sampleNameMapping) { | ||
String sampleName = getValueIgnoreCase(genericFields, "SampleName"); | ||
if (sampleName == null) { | ||
sampleName = getValueIgnoreCase(genericFields, "SampleId"); | ||
} | ||
if (sampleName == null) { | ||
sampleName = getValueIgnoreCase(genericFields, "Name"); | ||
} | ||
if (sampleName != null) { | ||
sampleNameMapping.put(id, sampleName); | ||
} | ||
} | ||
|
||
private String getValueIgnoreCase(Map<String, String> map, String key) { | ||
for (Map.Entry<String, String> entry : map.entrySet()) { | ||
if (entry.getKey().equalsIgnoreCase(key)) { | ||
String value = entry.getValue(); | ||
if (StringUtils.isNotEmpty(value)) { | ||
return value; | ||
} | ||
} | ||
} | ||
return null; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
...s/variant/VariantMetadataManagerTest.java → .../metadata/VariantMetadataManagerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
...ols/src/test/java/org/opencb/biodata/tools/variant/metadata/VariantMetadataUtilsTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package org.opencb.biodata.tools.variant.metadata; | ||
|
||
import org.junit.Test; | ||
import org.opencb.biodata.models.variant.Variant; | ||
import org.opencb.biodata.models.variant.VariantFileMetadata; | ||
import org.opencb.biodata.tools.variant.VariantVcfHtsjdkReader; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.nio.file.Paths; | ||
import java.util.Arrays; | ||
|
||
import static org.junit.Assert.*; | ||
|
||
public class VariantMetadataUtilsTest { | ||
|
||
@Test | ||
public void mapSamples() throws IOException { | ||
VariantFileMetadata variantFileMetadata; | ||
try (InputStream is = getClass().getResourceAsStream("/sampleMappingName.vcf")) { | ||
variantFileMetadata = VariantMetadataUtils | ||
.readVariantFileMetadata(is, new VariantFileMetadata("1", "")); | ||
assertEquals(Arrays.asList("sample_tumor", "sample_normal", "sample_other"), variantFileMetadata.getSampleIds()); | ||
} | ||
try (InputStream is = getClass().getResourceAsStream("/sampleMappingName.vcf")) { | ||
for (Variant variant : new VariantVcfHtsjdkReader(is, variantFileMetadata.toVariantStudyMetadata("s"))) { | ||
assertEquals("TUMOR", variant.getStudies().get(0).getSamples().get(0).getData().get(1)); | ||
assertEquals("NORMAL", variant.getStudies().get(0).getSamples().get(1).getData().get(1)); | ||
assertEquals("OTHER", variant.getStudies().get(0).getSamples().get(2).getData().get(1)); | ||
assertEquals(Arrays.asList("sample_tumor", "sample_normal", "sample_other"), variant.getStudies().get(0).getOrderedSamplesName()); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
##fileformat=VCFv4.2 | ||
##SAMPLE=<ID=TUMOR,SampleName=sample_tumor,Description="This is the tumor sample"> | ||
##SAMPLE=<ID=NORMAL,SampleId=sample_normal,Description="This is the normal sample"> | ||
##SAMPLE=<ID=OTHER,Name=sample_other,Description="This is the another sample"> | ||
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR NORMAL OTHER | ||
chr1 28494 . T C 100 PASS . GT:NAME 0|0:TUMOR 1|0:NORMAL 1|0:OTHER | ||
chr1 99166 . C T 100 PASS . GT:NAME 0|1:TUMOR 0|1:NORMAL 0|1:OTHER | ||
chr1 99580 . T C 100 PASS . GT:NAME 0|1:TUMOR 0|0:NORMAL 1|1:OTHER |
Oops, something went wrong.