Skip to content

Commit

Permalink
Merge branch 'release-2.4.x'
Browse files Browse the repository at this point in the history
  • Loading branch information
imedina committed Sep 7, 2022
2 parents a946b26 + 7b1bb05 commit 2926a2b
Show file tree
Hide file tree
Showing 16 changed files with 228 additions and 42 deletions.
11 changes: 10 additions & 1 deletion .github/workflows/develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,16 @@ jobs:
- name: Install Samtools
run: sudo apt-get install samtools
- name: Run tests with Maven
run: mvn -T 2 clean install
# run: mvn -T 2 clean install
run: mvn surefire-report:report
- name: Deploy tests web recursively to remote
uses: garygrossgarten/github-action-scp@release
with:
local: biodata-tools/target
remote: /mnt/data/opencb/biodata/tests/${{ needs.build.outputs.version }}/${{ github.ref_name }}/${{ github.sha }}
host: ${{ secrets.SSH_SWDEV_IP }}
username: ${{ secrets.SSH_SWDEV_USER }}
password: ${{ secrets.SSH_SWDEV_PASSWORD }}

deploy-maven:
uses: opencb/java-common-libs/.github/workflows/deploy-maven-repository-workflow.yml@develop
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/task.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,14 @@ jobs:
cache: 'maven'
- name: Install Samtools
run: sudo apt-get install samtools
- name: Run tests with Maven
run: mvn -T 2 clean install
- name: Run tests and create report with Maven
# run: mvn -T 2 clean install
run: mvn surefire-report:report
- name: Deploy tests web recursively to remote
uses: garygrossgarten/github-action-scp@release
with:
local: target/site
remote: /mnt/resources/opencb/biodata/tests/${{ github.ref_name }}/${{ github.sha }}
host: 128.232.224.128
username: ${{ secrets.SCP_SITE_USER }}
password: ${{ secrets.SCP_SITE_PASSWORD }}
2 changes: 1 addition & 1 deletion biodata-external/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<artifactId>biodata</artifactId>
<groupId>org.opencb.biodata</groupId>
<version>2.4.2</version>
<version>2.4.3</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion biodata-formats/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<groupId>org.opencb.biodata</groupId>
<artifactId>biodata</artifactId>
<version>2.4.2</version>
<version>2.4.3</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
2 changes: 1 addition & 1 deletion biodata-models/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<parent>
<groupId>org.opencb.biodata</groupId>
<artifactId>biodata</artifactId>
<version>2.4.2</version>
<version>2.4.3</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
4 changes: 2 additions & 2 deletions biodata-tools/pom.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0"?>
<?xml version="1.0" encoding="UTF-8"?>
<!--
~ Copyright 2015-2017 OpenCB
~
Expand All @@ -22,7 +22,7 @@
<parent>
<groupId>org.opencb.biodata</groupId>
<artifactId>biodata</artifactId>
<version>2.4.2</version>
<version>2.4.3</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.opencb.biodata.models.variant.avro.FileEntry;
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.tools.variant.converters.avro.VCFHeaderToVariantFileHeaderConverter;
import org.opencb.biodata.tools.variant.converters.avro.VCFHeaderToVariantFileMetadataConverter;
import org.opencb.biodata.tools.variant.converters.avro.VariantContextToVariantConverter;
import org.opencb.biodata.tools.variant.metadata.VariantMetadataManager;
import org.opencb.commons.utils.FileUtils;
Expand Down Expand Up @@ -178,23 +179,20 @@ public String readLine() throws IOException {
@Override public void close() {}
}));

// htsjdk automatically and inevitably sorts sample data in alphabetical order. Need to recover the original
// order in the VCF from the header and initialise the converter with the original order so that the order
// of samplesdata in CellBase output is exactly the same as in the original VCF
List<String> samplesInOriginalOrder = Arrays.asList(new String[header.getSampleNameToOffset().size()]);
for (Map.Entry<String, Integer> entry : header.getSampleNameToOffset().entrySet()) {
samplesInOriginalOrder.set(entry.getValue(), entry.getKey());
}
VCFHeaderToVariantFileMetadataConverter fileMetadataConverter = new VCFHeaderToVariantFileMetadataConverter();
fileMetadataConverter.convert(header, fileMetadata);
List<String> samples = fileMetadata.getSampleIds();

header = new VCFHeader(header.getMetaDataInInputOrder(), samples);
codec.setVCFHeader(header, codec.getVCFHeaderVersion());

// Create converters and fill VariantSource
converter = new VariantContextToVariantConverter(metadata.getId(), fileMetadata.getId(), samplesInOriginalOrder);
fileMetadata.setHeader(new VCFHeaderToVariantFileHeaderConverter().convert(header));
fileMetadata.setSampleIds(samplesInOriginalOrder);
// Create converters
converter = new VariantContextToVariantConverter(metadata.getId(), fileMetadata.getId(), samples);
if (metadata.getIndividuals() == null) {
metadata.setIndividuals(new ArrayList<>(samplesInOriginalOrder.size()));
metadata.setIndividuals(new ArrayList<>(samples.size()));
}
VariantMetadataManager metadataManager = new VariantMetadataManager(metadata);
for (String sample : samplesInOriginalOrder) {
for (String sample : samples) {
metadataManager.addIndividual(sample, sample, metadata.getId());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderSimpleLine;
import org.opencb.biodata.tools.commons.Converter;

import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.opencb.biodata.tools.variant.converters.avro;

import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSampleHeaderLine;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.models.variant.VariantFileMetadata;
import org.opencb.biodata.models.variant.metadata.VariantFileHeader;
import org.opencb.biodata.models.variant.metadata.VariantFileHeaderComplexLine;

import java.util.*;
import java.util.stream.Collectors;

public class VCFHeaderToVariantFileMetadataConverter {

public VariantFileMetadata convert(VCFHeader header, String id, String path) {
return convert(header, new VariantFileMetadata(id, path));
}

public VariantFileMetadata convert(VCFHeader header, VariantFileMetadata variantFileMetadata) {
VariantFileHeader variantFileHeader = new VCFHeaderToVariantFileHeaderConverter().convert(header);
Map<String, String> sampleMapping = getSampleMapping(variantFileHeader);
List<String> samples = getSamples(header, sampleMapping);

// Create converters and fill VariantSource
variantFileMetadata.setHeader(variantFileHeader);
variantFileMetadata.setSampleIds(samples);
if (variantFileMetadata.getAttributes() == null) {
variantFileMetadata.setAttributes(new HashMap<>());
}
variantFileMetadata.getAttributes().put("originalSamples", String.join(",", header.getGenotypeSamples()));

return variantFileMetadata;
}

public List<String> getSamples(VCFHeader header, Map<String, String> sampleNameMapping) {
if (sampleNameMapping == null) {
sampleNameMapping = Collections.emptyMap();
}

List<String> samplesInOriginalOrder = header.getGenotypeSamples();
List<String> renamedSamples = new ArrayList<>(samplesInOriginalOrder.size());
for (String sample : samplesInOriginalOrder) {
renamedSamples.add(sampleNameMapping.getOrDefault(sample, sample));
}

return renamedSamples;
}

public Map<String, String> getSampleMapping(VCFHeader header) {
Map<String, String> sampleNameMapping = new HashMap<>();
for (VCFHeaderLine line : header.getMetaDataInInputOrder()) {
if (line instanceof VCFSampleHeaderLine) {
VCFSampleHeaderLine sampleHeaderLine = (VCFSampleHeaderLine) line;
getActualSampleName(sampleHeaderLine.getID(), sampleHeaderLine.getGenericFields(), sampleNameMapping);
}
}
return sampleNameMapping;
}

public Map<String, String> getSampleMapping(VariantFileHeader header) {
Map<String, String> sampleNameMapping = new HashMap<>();
for (VariantFileHeaderComplexLine line : header.getComplexLines()) {
if (line.getKey().equals(VCFConstants.SAMPLE_HEADER_KEY)) {
getActualSampleName(line.getId(), line.getGenericFields(), sampleNameMapping);
}
}
return sampleNameMapping;
}

private void getActualSampleName(String id, Map<String, String> genericFields, Map<String, String> sampleNameMapping) {
String sampleName = getValueIgnoreCase(genericFields, "SampleName");
if (sampleName == null) {
sampleName = getValueIgnoreCase(genericFields, "SampleId");
}
if (sampleName == null) {
sampleName = getValueIgnoreCase(genericFields, "Name");
}
if (sampleName != null) {
sampleNameMapping.put(id, sampleName);
}
}

private String getValueIgnoreCase(Map<String, String> map, String key) {
for (Map.Entry<String, String> entry : map.entrySet()) {
if (entry.getKey().equalsIgnoreCase(key)) {
String value = entry.getValue();
if (StringUtils.isNotEmpty(value)) {
return value;
}
}
}
return null;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ public class VariantContextToVariantConverter implements Converter<VariantContex
private final String studyId;
private final String fileId;
private LinkedHashMap<String, Integer> samplesPosition;
private List<String> consequenceTypeFields;

protected Logger logger = LoggerFactory.getLogger(this.getClass().toString());

Expand All @@ -65,9 +64,6 @@ public VariantContextToVariantConverter(String studyId, String fileId, List<Stri
this.studyId = studyId;
this.fileId = fileId;

// TODO this must be parsed from VCF header
consequenceTypeFields = Arrays.asList();

samplesPosition = createSamplesPositionMap(samples);

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.opencb.biodata.models.variant.metadata.VariantMetadata;
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.tools.variant.converters.avro.VCFHeaderToVariantFileHeaderConverter;
import org.opencb.biodata.tools.variant.converters.avro.VCFHeaderToVariantFileMetadataConverter;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.utils.FileUtils;
import org.slf4j.Logger;
Expand Down Expand Up @@ -295,11 +296,8 @@ public VariantFileMetadata addFile(String filename, VCFHeader vcfHeader, String
return null;
}

VCFHeaderToVariantFileHeaderConverter headerConverter = new VCFHeaderToVariantFileHeaderConverter();
VariantFileMetadata variantFileMetadata = new VariantFileMetadata();
variantFileMetadata.setId(filename);
variantFileMetadata.setSampleIds(vcfHeader.getSampleNamesInOrder());
variantFileMetadata.setHeader(headerConverter.convert(vcfHeader));
VariantFileMetadata variantFileMetadata = new VCFHeaderToVariantFileMetadataConverter()
.convert(vcfHeader, filename, null).getImpl();
addFile(variantFileMetadata, studyId);
return variantFileMetadata;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,19 @@

package org.opencb.biodata.tools.variant.metadata;

import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.biodata.formats.variant.io.VariantReader;
import org.opencb.biodata.formats.variant.vcf4.io.VariantVcfReader;
import org.opencb.biodata.models.metadata.Sample;
import org.opencb.biodata.models.variant.VariantFileMetadata;
import org.opencb.biodata.models.variant.metadata.VariantStudyMetadata;
import org.opencb.biodata.tools.variant.VariantVcfHtsjdkReader;
import org.opencb.commons.utils.FileUtils;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Objects;

Expand All @@ -51,7 +49,7 @@ public class VariantMetadataUtils {
public static final String VARIANT_FILE_HEADER = "variantFileHeader";

/**
* Reads the VariantSource from a Vcf file given a file Path
* Reads the VariantFileMetadata from a Vcf file given a file Path
*
* @param path Path to the Vcf file
* @param fileMetadata Optional fileMetadata to fill up
Expand All @@ -61,10 +59,23 @@ public class VariantMetadataUtils {
public static VariantFileMetadata readVariantFileMetadata(Path path, VariantFileMetadata fileMetadata) throws IOException {
Objects.requireNonNull(path);
try (InputStream is = FileUtils.newInputStream(path)) {
return readVariantFileMetadata(new VariantVcfHtsjdkReader(is, fileMetadata.toVariantStudyMetadata("")), fileMetadata);
return readVariantFileMetadata(is, fileMetadata);
}
}

/**
* Reads the VariantFileMetadata from a Vcf file given a input stream
*
* @param is Vcf input stream
* @param fileMetadata Optional fileMetadata to fill up
* @return The read variant fileMetadata
* @throws IOException if an I/O error occurs
*/
public static VariantFileMetadata readVariantFileMetadata(InputStream is, VariantFileMetadata fileMetadata) throws IOException {
Objects.requireNonNull(is);
return readVariantFileMetadata(new VariantVcfHtsjdkReader(is, fileMetadata.toVariantStudyMetadata("")), fileMetadata);
}

/**
* Reads the VariantSource from a Variant file given an initialized VariantReader
*
Expand All @@ -82,10 +93,18 @@ public static VariantFileMetadata readVariantFileMetadata(VariantReader reader,
try {
reader.open();
reader.pre();

metadata.setHeader(reader.getVariantFileMetadata().getHeader());
metadata.setSampleIds(reader.getVariantFileMetadata().getSampleIds());
metadata.setStats(reader.getVariantFileMetadata().getStats());
if (reader.getVariantFileMetadata() != metadata) {
metadata.setHeader(reader.getVariantFileMetadata().getHeader());
metadata.setSampleIds(reader.getVariantFileMetadata().getSampleIds());
metadata.setStats(reader.getVariantFileMetadata().getStats());
if (reader.getVariantFileMetadata().getAttributes() != null) {
if (metadata.getAttributes() == null) {
metadata.setAttributes(reader.getVariantFileMetadata().getAttributes());
} else {
metadata.getAttributes().putAll(reader.getVariantFileMetadata().getAttributes());
}
}
}

reader.post();
} finally {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.opencb.biodata.tools.variant;
package org.opencb.biodata.tools.variant.metadata;

import org.apache.commons.collections.map.HashedMap;
import org.junit.Before;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package org.opencb.biodata.tools.variant.metadata;

import org.junit.Test;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantFileMetadata;
import org.opencb.biodata.tools.variant.VariantVcfHtsjdkReader;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.util.Arrays;

import static org.junit.Assert.*;

public class VariantMetadataUtilsTest {

@Test
public void mapSamples() throws IOException {
VariantFileMetadata variantFileMetadata;
try (InputStream is = getClass().getResourceAsStream("/sampleMappingName.vcf")) {
variantFileMetadata = VariantMetadataUtils
.readVariantFileMetadata(is, new VariantFileMetadata("1", ""));
assertEquals(Arrays.asList("sample_tumor", "sample_normal", "sample_other"), variantFileMetadata.getSampleIds());
}
try (InputStream is = getClass().getResourceAsStream("/sampleMappingName.vcf")) {
for (Variant variant : new VariantVcfHtsjdkReader(is, variantFileMetadata.toVariantStudyMetadata("s"))) {
assertEquals("TUMOR", variant.getStudies().get(0).getSamples().get(0).getData().get(1));
assertEquals("NORMAL", variant.getStudies().get(0).getSamples().get(1).getData().get(1));
assertEquals("OTHER", variant.getStudies().get(0).getSamples().get(2).getData().get(1));
assertEquals(Arrays.asList("sample_tumor", "sample_normal", "sample_other"), variant.getStudies().get(0).getOrderedSamplesName());
}
}
}
}
8 changes: 8 additions & 0 deletions biodata-tools/src/test/resources/sampleMappingName.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##SAMPLE=<ID=TUMOR,SampleName=sample_tumor,Description="This is the tumor sample">
##SAMPLE=<ID=NORMAL,SampleId=sample_normal,Description="This is the normal sample">
##SAMPLE=<ID=OTHER,Name=sample_other,Description="This is the another sample">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT TUMOR NORMAL OTHER
chr1 28494 . T C 100 PASS . GT:NAME 0|0:TUMOR 1|0:NORMAL 1|0:OTHER
chr1 99166 . C T 100 PASS . GT:NAME 0|1:TUMOR 0|1:NORMAL 0|1:OTHER
chr1 99580 . T C 100 PASS . GT:NAME 0|1:TUMOR 0|0:NORMAL 1|1:OTHER
Loading

0 comments on commit 2926a2b

Please sign in to comment.