Skip to content

Commit

Permalink
Merge pull request #2427 from opencb/TASK-5878
Browse files Browse the repository at this point in the history
TASK-5878 - Unable to filter by dbSNP (rsIds) and HGVS id in variant browser
  • Loading branch information
j-coll authored Apr 22, 2024
2 parents de9378c + 86f68e6 commit dc01b16
Show file tree
Hide file tree
Showing 22 changed files with 594 additions and 133 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,7 @@ public List<Variant> getVariants(List<String> variantsStr) {
List<Variant> variants = new ArrayList<>(variantsStr.size());
List<CellBaseDataResult<Variant>> response = null;
try {
// FIXME: This method should call genomic/variant/snp/search
response = checkNulls(cellBaseClient.getVariantClient().get(variantsStr,
new QueryOptions(QueryOptions.INCLUDE,
VariantField.CHROMOSOME.fieldName() + ","
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1192,8 +1192,36 @@ public VariantQueryResult<Variant> getCompoundHeterozygous(String study, String
return get(query, options);
}

public DataResult<Variant> getSampleData(String variant, String study, QueryOptions options) throws StorageEngineException {
return new VariantSampleDataManager(getDBAdaptor()).getSampleData(variant, study, options);
public DataResult<Variant> getSampleData(String variantStr, String study, QueryOptions options) throws StorageEngineException {
final Variant variant = getVariant(variantStr);
return getVariantSampleDataManager().getSampleData(variant, study, options);
}

public Variant getVariant(String variantStr) {
final Variant variant;
if (VariantQueryUtils.isVariantId(variantStr)) {
variant = new Variant(variantStr);
} else if (VariantQueryUtils.isVariantAccession(variantStr)) {
VariantQueryResult<Variant> result = get(new Query(VariantQueryParam.ANNOT_XREF.key(), variantStr),
new QueryOptions(QueryOptions.INCLUDE, VariantField.ID).append(QueryOptions.LIMIT, 1).append(QueryOptions.COUNT, true));
if (result.getNumMatches() > 1) {
throw new VariantQueryException("Not unique variant identifier '" + variantStr + "'."
+ " Found " + result.getNumMatches() + " results");
} else if (result.getNumResults() == 1) {
variant = result.first();
} else {
throw VariantQueryException.variantNotFound(variantStr);
}
} else {
throw new VariantQueryException("Variant not valid. Variant = '" + variantStr + "'. Supported values:"
+ " {chr}:{start}:{end}:{ref}:{alt}, rs{id}");
}
variant.setId(variant.toString());
return variant;
}

protected VariantSampleDataManager getVariantSampleDataManager() throws StorageEngineException {
return new VariantSampleDataManager(getDBAdaptor());
}

public VariantQueryResult<Variant> get(Query query, QueryOptions options) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ public VariantQuery sample(String value) {
return sample(VariantQueryUtils.QueryOperation.OR, value);
}

public VariantQuery sample(List<String> value) {
return sample(VariantQueryUtils.QueryOperation.OR, value);
}

public VariantQuery sample(VariantQueryUtils.QueryOperation operation, List<String> value) {
put(VariantQueryParam.SAMPLE.key(), value.stream().collect(Collectors.joining(operation.separator())));
return this;
}

public VariantQuery sample(String... value) {
return sample(VariantQueryUtils.QueryOperation.OR, value);
}
Expand Down Expand Up @@ -178,8 +187,9 @@ public VariantQuery includeSampleId(boolean value) {
put(VariantQueryParam.INCLUDE_SAMPLE_ID.key(), value);
return this;
}
public String includeSampleId() {
return getString(VariantQueryParam.INCLUDE_SAMPLE_ID.key());

public boolean includeSampleId() {
return getBoolean(VariantQueryParam.INCLUDE_SAMPLE_ID.key());
}

public VariantQuery sampleMetadata(boolean value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public final class VariantQueryParam implements QueryParam {
private static final String ACCEPTS_AND_OR = "Accepts AND (" + AND + ") and OR (" + OR + ") operators.";

public static final String ID_DESCR
= "List of IDs, these can be rs IDs (dbSNP) or variants in the format chrom:start:ref:alt, e.g. rs116600158,19:7177679:C:T";
= "List of variant IDs in the format chrom:start:ref:alt, e.g. 19:7177679:C:T";
public static final VariantQueryParam ID = new VariantQueryParam("id", TEXT_ARRAY, ID_DESCR);

public static final String REGION_DESCR
Expand Down Expand Up @@ -121,7 +121,7 @@ public final class VariantQueryParam implements QueryParam {

public static final String INCLUDE_SAMPLE_ID_DESCR
= "Include sampleId on each result";
public static final VariantQueryParam INCLUDE_SAMPLE_ID = new VariantQueryParam("includeSampleId", TEXT_ARRAY, INCLUDE_SAMPLE_ID_DESCR);
public static final VariantQueryParam INCLUDE_SAMPLE_ID = new VariantQueryParam("includeSampleId", BOOLEAN, INCLUDE_SAMPLE_ID_DESCR);

public static final String SAMPLE_METADATA_DESCR
= "Return the samples metadata group by study. Sample names will appear in the same order as their corresponding genotypes.";
Expand Down Expand Up @@ -215,7 +215,7 @@ public final class VariantQueryParam implements QueryParam {

public static final String ANNOT_XREF_DESCR
= "List of any external reference, these can be genes, proteins or variants. "
+ "Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, ...";
+ "Accepted IDs include HGNC, Ensembl genes, dbSNP, ClinVar, HPO, Cosmic, HGVS ...";
public static final VariantQueryParam ANNOT_XREF = new VariantQueryParam("xref", TEXT_ARRAY, ANNOT_XREF_DESCR);

public static final String GENE_DESCR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@ public VariantSampleDataManager(VariantDBAdaptor dbAdaptor) {

}

public final DataResult<Variant> getSampleData(String variant, String study, QueryOptions options) {
public final DataResult<Variant> getSampleData(Variant variant, String study, QueryOptions options) {
options = options == null ? new QueryOptions() : options;
int sampleLimit = options.getInt(SAMPLE_BATCH_SIZE, SAMPLE_BATCH_SIZE_DEFAULT);
return getSampleData(variant, study, options, sampleLimit);
}

public final DataResult<Variant> getSampleData(String variant, String study, QueryOptions options, int sampleLimit) {
public final DataResult<Variant> getSampleData(Variant variant, String study, QueryOptions options, int sampleLimit) {
options = options == null ? new QueryOptions() : options;


Expand Down Expand Up @@ -77,7 +77,7 @@ public final DataResult<Variant> getSampleData(String variant, String study, Que
}

protected DataResult<Variant> getSampleData(
String variantStr, String study, QueryOptions options, List<String> includeSamples, Set<String> genotypes,
Variant variant, String study, QueryOptions options, List<String> includeSamples, Set<String> genotypes,
int sampleLimit) {
options = options == null ? new QueryOptions() : options;
Set<VariantField> includeFields = VariantField.getIncludeFields(options);
Expand All @@ -98,7 +98,7 @@ protected DataResult<Variant> getSampleData(
int queries = 0;
while (true) {
queries++;
Query query = new Query(VariantQueryParam.ID.key(), variantStr)
Query query = new Query(VariantQueryParam.ID.key(), variant.toString())
.append(VariantQueryParam.STUDY.key(), study)
.append(VariantQueryParam.INCLUDE_GENOTYPE.key(), options.get(VariantQueryParam.INCLUDE_GENOTYPE.key()))
.append(VariantQueryParam.INCLUDE_SAMPLE_DATA.key(), options.get(VariantQueryParam.INCLUDE_SAMPLE_DATA.key()))
Expand Down Expand Up @@ -130,7 +130,7 @@ protected DataResult<Variant> getSampleData(

DataResult<Variant> result = dbAdaptor.get(query, variantQueryOptions);
if (result.getNumResults() == 0) {
throw VariantQueryException.variantNotFound(variantStr);
throw VariantQueryException.variantNotFound(variant.toString());
}
dbTime += result.getTime();
Variant partialVariant = result.first();
Expand Down Expand Up @@ -199,7 +199,7 @@ protected DataResult<Variant> getSampleData(
}
}

Variant variant = new Variant(variantStr);
variant = new Variant(variant.toString());
variant.setAnnotation(annotation);
StudyEntry studyEntry = new StudyEntry(study);
variant.addStudyEntry(studyEntry);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package org.opencb.opencga.storage.core.variant.annotation.converters;

import org.apache.commons.collections4.CollectionUtils;
import org.opencb.biodata.models.variant.avro.*;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

public class VariantAnnotationModelUtils {

/**
* Extracts all the XRefs from a VariantAnnotation object.
* Includes:
* - annotation.id
* - annotation.xrefs.id
* - annotation.hgvs
* - annotation.consequenceTypes.geneName
* - annotation.consequenceTypes.geneId
* - annotation.consequenceTypes.ensemblGeneId
* - annotation.consequenceTypes.transcriptId
* - annotation.consequenceTypes.ensemblTranscriptId
* - annotation.consequenceTypes.hgvs
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotAccession
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotName
* - annotation.consequenceTypes.proteinVariantAnnotation.uniprotVariantId
* - annotation.consequenceTypes.proteinVariantAnnotation.features.id
* - annotation.traitAssociation.id
* - annotation.geneTraitAssociation.hpo
* - annotation.geneTraitAssociation.id
*
* @param variantAnnotation VariantAnnotation object
* @return Set of XRefs
*/
public Set<String> extractXRefs(VariantAnnotation variantAnnotation) {
Set<String> xrefs = new HashSet<>();

if (variantAnnotation == null) {
return xrefs;
}

xrefs.add(variantAnnotation.getId());

if (variantAnnotation.getXrefs() != null) {
for (Xref xref : variantAnnotation.getXrefs()) {
if (xref != null) {
xrefs.add(xref.getId());
}
}
}

if (variantAnnotation.getHgvs() != null) {
xrefs.addAll(variantAnnotation.getHgvs());
}

List<ConsequenceType> consequenceTypes = variantAnnotation.getConsequenceTypes();
if (consequenceTypes != null) {
for (ConsequenceType conseqType : consequenceTypes) {
xrefs.add(conseqType.getGeneName());
xrefs.add(conseqType.getGeneId());
xrefs.add(conseqType.getEnsemblGeneId());
xrefs.add(conseqType.getTranscriptId());
xrefs.add(conseqType.getEnsemblTranscriptId());

if (conseqType.getHgvs() != null) {
xrefs.addAll(conseqType.getHgvs());
}

ProteinVariantAnnotation protVarAnnotation = conseqType.getProteinVariantAnnotation();
if (protVarAnnotation != null) {

xrefs.add(protVarAnnotation.getUniprotAccession());
xrefs.add(protVarAnnotation.getUniprotName());
xrefs.add(protVarAnnotation.getUniprotVariantId());

if (protVarAnnotation.getFeatures() != null) {
for (ProteinFeature proteinFeature : protVarAnnotation.getFeatures()) {
xrefs.add(proteinFeature.getId());
}
}
}
}

}

if (CollectionUtils.isNotEmpty(variantAnnotation.getTraitAssociation())) {
for (EvidenceEntry evidenceEntry : variantAnnotation.getTraitAssociation()) {
xrefs.add(evidenceEntry.getId());
}
}

if (variantAnnotation.getGeneTraitAssociation() != null) {
for (GeneTraitAssociation geneTrait : variantAnnotation.getGeneTraitAssociation()) {
xrefs.add(geneTrait.getHpo());
xrefs.add(geneTrait.getId());
}
}

// Remove empty strings and nulls
xrefs.remove("");
xrefs.remove(null);

return xrefs;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -226,5 +226,16 @@ public List<String> getIDsAndXrefs() {
public boolean isEmpty() {
return genes.isEmpty() && variants.isEmpty() && ids.isEmpty() && otherXrefs.isEmpty();
}

@Override
public String toString() {
final StringBuilder sb = new StringBuilder("VariantQueryXref{");
sb.append("genes=").append(genes);
sb.append(", variants=").append(variants);
sb.append(", ids=").append(ids);
sb.append(", otherXrefs=").append(otherXrefs);
sb.append('}');
return sb.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,11 @@ public static ParsedVariantQuery.VariantQueryXref parseXrefs(Query query) {
if (variant != null) {
xrefs.getVariants().add(variant);
} else {
if (isVariantAccession(value) || isClinicalAccession(value) || isGeneAccession(value)) {
if (isVariantAccession(value)
|| isClinicalAccession(value)
|| isGeneAccession(value)
|| isHGVS(value)
|| isProteinFeatureId(value)) {
xrefs.getOtherXrefs().add(value);
} else {
genes.add(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,20 @@ public static boolean isVariantAccession(String value) {
return value.startsWith("rs") || value.startsWith("VAR_");
}

/**
* Determines if the given value is a HGVS.
*
* @param value Value to check
* @return If is a known accession
*/
public static boolean isHGVS(String value) {
// Check regex ':[cnpg].'
// HGVC examples :
// - "1:g.65325832G>A"
// - "1:g.65325832_65325833insA"
return value.contains(":c.") || value.contains(":n.") || value.contains(":p.") || value.contains(":g.");
}

/**
* Determines if the given value is a known clinical accession or not.
* <p>
Expand Down Expand Up @@ -494,6 +508,18 @@ public static boolean isGeneAccession(String value) {
return isHpo(value) || value.startsWith("OMIM:") || value.startsWith("umls:");
}

/**
* Determines if the given value is a valid protein feature id.
* <p>
* Protein feature id starts with 'PRO_', 'VAR_' or 'VSP_'
*
* @param value Value to check
* @return If is a known accession
*/
public static boolean isProteinFeatureId(String value) {
return value.startsWith("PRO_") | value.startsWith("VAR_") | value.startsWith("VSP_");
}

/**
* Determines if the given value is a HPO term or not.
* <p>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ protected boolean shouldGetApproximateCount(QueryOptions options, boolean iterat
// }

protected int getLimit(QueryOptions options) {
return options.getInt(QueryOptions.LIMIT);
return options.getInt(QueryOptions.LIMIT, -1);
}

protected int getSkip(QueryOptions options) {
Expand Down
Loading

0 comments on commit dc01b16

Please sign in to comment.