diff --git a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/MolecularDataRepository.java b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/MolecularDataRepository.java index 61508b03fe7..8696a292413 100644 --- a/persistence/persistence-api/src/main/java/org/cbioportal/persistence/MolecularDataRepository.java +++ b/persistence/persistence-api/src/main/java/org/cbioportal/persistence/MolecularDataRepository.java @@ -14,6 +14,9 @@ public interface MolecularDataRepository { List getGeneMolecularAlterations(String molecularProfileId, List entrezGeneIds, String projection); + Iterable getGeneMolecularAlterationsIterable(String molecularProfileId, List entrezGeneIds, + String projection); + List getGeneMolecularAlterationsInMultipleMolecularProfiles(List molecularProfileIds, List entrezGeneIds, String projection); diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMapper.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMapper.java index c351310d7f9..1473ff45997 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMapper.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMapper.java @@ -4,13 +4,14 @@ import org.cbioportal.model.GenesetMolecularAlteration; import java.util.List; +import org.apache.ibatis.cursor.Cursor; public interface MolecularDataMapper { List getCommaSeparatedSampleIdsOfMolecularProfiles(List molecularProfileIds); - List getGeneMolecularAlterations(String molecularProfileId, List entrezGeneIds, - String projection); + Cursor getGeneMolecularAlterations(String molecularProfileId, List entrezGeneIds, + String projection); List getGeneMolecularAlterationsInMultipleMolecularProfiles(List molecularProfileIds, List entrezGeneIds, diff --git a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java index e2b8b36ad00..2f3ba89417b 100644 --- a/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java +++ b/persistence/persistence-mybatis/src/main/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepository.java @@ -4,9 +4,9 @@ import org.cbioportal.model.GenesetMolecularAlteration; import org.cbioportal.persistence.MolecularDataRepository; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.transaction.annotation.*; import org.springframework.stereotype.Repository; -import java.util.Arrays; -import java.util.List; +import java.util.*; @Repository public class MolecularDataMyBatisRepository implements MolecularDataRepository { @@ -27,9 +27,27 @@ public List getCommaSeparatedSampleIdsOfMolecularProfiles(List m } @Override + // cursor processing requires a transaction + @Transactional(readOnly=true, propagation=Propagation.NESTED) public List getGeneMolecularAlterations(String molecularProfileId, List entrezGeneIds, String projection) { + List toReturn = new ArrayList(); + Iterable gmasItr = + molecularDataMapper.getGeneMolecularAlterations(molecularProfileId, entrezGeneIds, projection); + for (GeneMolecularAlteration gma : gmasItr) { + toReturn.add(gma); + } + return toReturn; + } + + @Override + // In order to return a cursor/iterator to the service layer, we need a transaction setup in the service + // layer. Currently, the bottom stackframe is CoExpressionService:getCoExpressions. It is there where + // you will find the transaction created. + public Iterable getGeneMolecularAlterationsIterable(String molecularProfileId, + List entrezGeneIds, String projection) { + return molecularDataMapper.getGeneMolecularAlterations(molecularProfileId, entrezGeneIds, projection); } diff --git a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepositoryTest.java b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepositoryTest.java index cb8c681ee54..d246f4d8d73 100644 --- a/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepositoryTest.java +++ b/persistence/persistence-mybatis/src/test/java/org/cbioportal/persistence/mybatis/MolecularDataMyBatisRepositoryTest.java @@ -7,6 +7,7 @@ import org.junit.runner.RunWith; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Configurable; +import org.springframework.transaction.annotation.Transactional; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; @@ -43,6 +44,7 @@ public void getCommaSeparatedSampleIdsOfMolecularProfiles() throws Exception { } @Test + @Transactional(readOnly=true) public void getGeneMolecularAlterations() throws Exception { List entrezGeneIds = new ArrayList<>(); diff --git a/service/src/main/java/org/cbioportal/service/MolecularDataService.java b/service/src/main/java/org/cbioportal/service/MolecularDataService.java index 66d79f8c42e..18043f1908c 100644 --- a/service/src/main/java/org/cbioportal/service/MolecularDataService.java +++ b/service/src/main/java/org/cbioportal/service/MolecularDataService.java @@ -23,8 +23,8 @@ List fetchMolecularData(String molecularProfileId, List sampleIds, List entrezGeneIds) throws MolecularProfileNotFoundException; - List getMolecularAlterations(String molecularProfileId, List entrezGeneIds, - String projection) throws MolecularProfileNotFoundException; + Iterable getMolecularAlterations(String molecularProfileId, List entrezGeneIds, + String projection) throws MolecularProfileNotFoundException; Integer getNumberOfSamplesInMolecularProfile(String molecularProfileId); diff --git a/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java index a7c3b0c719d..4c2e001cfe8 100644 --- a/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/CoExpressionServiceImpl.java @@ -6,6 +6,8 @@ import org.apache.commons.math3.stat.correlation.SpearmansCorrelation; import org.cbioportal.model.Gene; import org.cbioportal.model.MolecularAlteration; +import org.cbioportal.model.GeneMolecularAlteration; +import org.cbioportal.model.GenesetMolecularAlteration; import org.cbioportal.model.Geneset; import org.cbioportal.model.MolecularData; import org.cbioportal.model.MolecularProfile; @@ -26,6 +28,7 @@ import org.cbioportal.service.exception.SampleListNotFoundException; import org.cbioportal.service.CoExpressionService; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.transaction.annotation.Transactional; import org.springframework.stereotype.Service; import java.math.BigDecimal; @@ -61,19 +64,8 @@ public class CoExpressionServiceImpl implements CoExpressionService { private SampleService sampleService; @Override - public List getCoExpressions(String molecularProfileId, String sampleListId, String geneticEntityId, - CoExpression.GeneticEntityType geneticEntityType, Double threshold) - throws MolecularProfileNotFoundException, GenesetNotFoundException, GeneNotFoundException { - - List sampleIds = sampleListRepository.getAllSampleIdsInSampleList(sampleListId); - if (sampleIds.isEmpty()) { - return Collections.emptyList(); - } - - return fetchCoExpressions(molecularProfileId, sampleIds, geneticEntityId, geneticEntityType, threshold); - } - - @Override + // transaction needs to be setup here in order to return Iterable from molecularDataService in fetchCoExpressions + @Transactional(readOnly=true) public List getCoExpressions(String geneticEntityId, CoExpression.GeneticEntityType geneticEntityType, String sampleListId, String molecularProfileIdA, String molecularProfileIdB, Double threshold) throws MolecularProfileNotFoundException, SampleListNotFoundException, GenesetNotFoundException, @@ -117,31 +109,49 @@ public List getCoExpressions(String geneticEntityId, CoExpression. return computedCoExpressions; } + @Override + public List getCoExpressions(String molecularProfileId, String sampleListId, String geneticEntityId, + CoExpression.GeneticEntityType geneticEntityType, Double threshold) + throws MolecularProfileNotFoundException, GenesetNotFoundException, GeneNotFoundException { + + List sampleIds = sampleListRepository.getAllSampleIdsInSampleList(sampleListId); + if (sampleIds.isEmpty()) { + return Collections.emptyList(); + } + + return fetchCoExpressions(molecularProfileId, sampleIds, geneticEntityId, geneticEntityType, threshold); + } + @Override public List fetchCoExpressions(String molecularProfileId, List sampleIds, String queryGeneticEntityId, CoExpression.GeneticEntityType geneticEntityType, Double threshold) throws MolecularProfileNotFoundException, GenesetNotFoundException, GeneNotFoundException { - - List molecularAlterations = null; + + // For the purpose of the CoExpression computation, we separate the MolecularAlteration + // (genetic_alteration table record) for the query gene/geneset from the MolecularAlteration(s) + // for the remaining genes/geneset in the profile. + MolecularAlteration queryMolecularDataList = null; + Iterable maItr = null; if (geneticEntityType.equals(GeneticEntityType.GENE)) { - molecularAlterations = molecularDataService.getMolecularAlterations( - molecularProfileId, null, "SUMMARY"); + List queryGeneticEntityIds = Arrays.asList(Integer.valueOf(queryGeneticEntityId)); + maItr = molecularDataService.getMolecularAlterations(molecularProfileId, queryGeneticEntityIds, "SUMMARY"); } else if (geneticEntityType.equals(GeneticEntityType.GENESET)) { - molecularAlterations = genesetDataService.getGenesetAlterations( - molecularProfileId, null); + List queryGeneticEntityIds = Arrays.asList(queryGeneticEntityId); + maItr = genesetDataService.getGenesetAlterations(molecularProfileId, queryGeneticEntityIds); + } + for (MolecularAlteration ma : maItr) { + queryMolecularDataList = ma; } - - Map molecularDataMap = molecularAlterations.stream() - .collect(Collectors.toMap(MolecularAlteration::getStableId, Function.identity())); - MolecularAlteration queryMolecularDataList = molecularDataMap.remove(queryGeneticEntityId); - - List coExpressionList = new ArrayList<>(); - if (queryMolecularDataList == null) { - return coExpressionList; + return Collections.emptyList(); } + // These next few lines are used to build a map of internal sample ids to + // indices into the genetic_alteration.VALUES column. Recall this column + // of the genetic_alteration table is a comma separated list of scalar values. + // Each value in this list is associated with a sample at the same position found in + // the genetic_profile_samples.ORDERED_SAMPLE_LIST column. String commaSeparatedSampleIdsOfMolecularProfile = molecularDataRepository .getCommaSeparatedSampleIdsOfMolecularProfile(molecularProfileId); List internalSampleIds = Arrays.stream(commaSeparatedSampleIdsOfMolecularProfile.split(",")) @@ -151,6 +161,9 @@ public List fetchCoExpressions(String molecularProfileId, List studyIds = new ArrayList<>(); sampleIds.forEach(s -> studyIds.add(molecularProfile.getCancerStudyIdentifier())); @@ -160,29 +173,52 @@ public List fetchCoExpressions(String molecularProfileId, List includedIndexes = new HashSet<>(); for (Integer internalSampleId : internalSampleIds) { if (selectedSampleIdsMap.containsKey(internalSampleId)) { includedIndexes.add(internalSampleIdToIndexMap.get(internalSampleId)); } } - + Boolean isMolecularProfileBOfGenesetType = molecularProfile.getMolecularAlterationType() - .equals(MolecularProfile.MolecularAlterationType.GENESET_SCORE); + .equals(MolecularProfile.MolecularAlterationType.GENESET_SCORE); + + + // These next few lines filter out genetic_alteration values from the query gene/geneset + // genetic_alteration.VALUES column by considering only the indices of the samples in the user query. List queryValues = Arrays.asList(queryMolecularDataList.getSplitValues()); List includedQueryValues = includedIndexes.stream().map(index -> queryValues.get(index)) - .collect(Collectors.toList()); + .collect(Collectors.toList()); - Map> values = new HashMap>(); - for (String entityId : molecularDataMap.keySet()) { - List internalValues = new ArrayList<>( - Arrays.asList(molecularDataMap.get(entityId).getSplitValues())); - List includedInternalValues = includedIndexes.stream().map(index -> internalValues.get(index)) - .collect(Collectors.toList()); - values.put(entityId, includedInternalValues); + // Get an iterator to all the MolecularAlteration (genetic_alteration table records) in the profile + if (geneticEntityType.equals(GeneticEntityType.GENE)) { + maItr = molecularDataService.getMolecularAlterations(molecularProfileId, null, "SUMMARY"); + } else if (geneticEntityType.equals(GeneticEntityType.GENESET)) { + maItr = genesetDataService.getGenesetAlterations(molecularProfileId, null); } - coExpressionList = computeCoExpressions(values, includedQueryValues, isMolecularProfileBOfGenesetType, threshold); - return coExpressionList; + + // For each MolecularAlteration in the profile, compute a CoExpression to return. + // If the MolecularAlteration is for the query gene/geneset, skip it. Otherwise, + // filter out genetic_alteration values from genetic_alteration.VALUES + // by considering oly the indices of the samples in the user query. + List toReturn = new ArrayList<>(); + for (MolecularAlteration ma : maItr) { + String entityId = ma.getStableId(); + if (entityId.equals(queryGeneticEntityId)) { + continue; + } + List internalValues = new ArrayList<>(Arrays.asList(ma.getSplitValues())); + List values = includedIndexes.stream().map(index -> internalValues.get(index)).collect(Collectors.toList()); + CoExpression ce = computeCoExpressions(entityId, values, includedQueryValues, isMolecularProfileBOfGenesetType, threshold); + if (ce != null) { + toReturn.add(ce); + } + } + + return toReturn; } @Override @@ -222,100 +258,92 @@ public List fetchCoExpressions(String geneticEntityId, private List computeCoExpressionsFromMolecularData(List molecularDataListB, Boolean isMolecularProfileBOfGenesetType, List molecularDataListA, - String queryGeneticEntityId, Double threshold) throws GenesetNotFoundException, GeneNotFoundException - { - + String queryGeneticEntityId, Double threshold) throws GenesetNotFoundException, GeneNotFoundException { + Map> molecularDataMapA = molecularDataListA.stream() .collect(Collectors.groupingBy(MolecularData::getStableId)); Map> molecularDataMapB = molecularDataListB.stream() .collect(Collectors.groupingBy(MolecularData::getStableId)); - - List coExpressionList = new ArrayList<>(); - + if (!molecularDataMapA.keySet().contains(queryGeneticEntityId)) { - return coExpressionList; + return Collections.emptyList(); } List finalMolecularDataListA = (List)molecularDataMapA.remove(queryGeneticEntityId); if (molecularDataMapB.get(queryGeneticEntityId) != null) { List finalMolecularDataListB = (List)molecularDataMapB.remove(queryGeneticEntityId); if (finalMolecularDataListB == null) { - return coExpressionList; + return Collections.emptyList(); } } - Map> values = new HashMap>(); + List coExpressionList = new ArrayList<>(); + List valuesB = finalMolecularDataListA.stream().map(g -> g.getValue()).collect(Collectors.toList()); for (String entityId : molecularDataMapB.keySet()) { List internalValues = molecularDataMapB.get(entityId).stream().map(g -> g.getValue()) .collect(Collectors.toList()); - values.put(entityId, internalValues); + CoExpression co = computeCoExpressions(entityId, internalValues, valuesB, isMolecularProfileBOfGenesetType, threshold); + if (co != null) { + coExpressionList.add(co); + } } - List valuesB = finalMolecularDataListA.stream().map(g -> g.getValue()).collect(Collectors.toList()); - coExpressionList = computeCoExpressions(values, valuesB, isMolecularProfileBOfGenesetType, threshold); return coExpressionList; } - private List computeCoExpressions(Map> valuesA, List valuesB, + private CoExpression computeCoExpressions(String entityId, List valuesA, List valuesB, Boolean isMolecularProfileBOfGenesetType, Double threshold) throws GenesetNotFoundException, GeneNotFoundException { - - - List coExpressionList = new ArrayList<>(); - for (String entityId : valuesA.keySet()) { - List values = valuesA.get(entityId); - List valuesBCopy = new ArrayList<>(valuesB); - - List valuesToRemove = new ArrayList<>(); - for (int i = 0; i < valuesBCopy.size(); i++) { - if (!NumberUtils.isNumber(valuesBCopy.get(i)) || !NumberUtils.isNumber(values.get(i))) { - valuesToRemove.add(i); - } - } - for (int i = 0; i < valuesToRemove.size(); i++) { - int valueToRemove = valuesToRemove.get(i) - i; - valuesBCopy.remove(valueToRemove); - values.remove(valueToRemove); - } - - CoExpression coExpression = new CoExpression(); - coExpression.setGeneticEntityId(entityId); - if (isMolecularProfileBOfGenesetType) { - Geneset geneset = genesetService.getGeneset(entityId); - coExpression.setCytoband("-"); - coExpression.setGeneticEntityName(geneset.getName()); - } else { - Gene gene = geneService.getGene(entityId); - coExpression.setCytoband(gene.getCytoband()); - coExpression.setGeneticEntityName(gene.getHugoGeneSymbol()); + List valuesACopy = new ArrayList<>(valuesA); + List valuesBCopy = new ArrayList<>(valuesB); + + List valuesToRemove = new ArrayList<>(); + for (int i = 0; i < valuesBCopy.size(); i++) { + if (!NumberUtils.isNumber(valuesBCopy.get(i)) || !NumberUtils.isNumber(valuesACopy.get(i))) { + valuesToRemove.add(i); } - + } - double[] valuesBNumber = valuesBCopy.stream().mapToDouble(Double::parseDouble).toArray(); - double[] valuesNumber = values.stream().mapToDouble(Double::parseDouble).toArray(); + for (int i = 0; i < valuesToRemove.size(); i++) { + int valueToRemove = valuesToRemove.get(i) - i; + valuesBCopy.remove(valueToRemove); + valuesACopy.remove(valueToRemove); + } - if (valuesNumber.length <= 2) { - continue; - } - - double[][] arrays = new double[2][valuesNumber.length]; - arrays[0] = valuesBNumber; - arrays[1] = valuesNumber; - SpearmansCorrelation spearmansCorrelation = new SpearmansCorrelation((new Array2DRowRealMatrix(arrays, false)).transpose()); - - double spearmansValue = spearmansCorrelation.correlation(valuesBNumber, valuesNumber); - if (Double.isNaN(spearmansValue) || Math.abs(spearmansValue) < threshold) { - continue; - } - coExpression.setSpearmansCorrelation(BigDecimal.valueOf(spearmansValue)); + CoExpression coExpression = new CoExpression(); + coExpression.setGeneticEntityId(entityId); + if (isMolecularProfileBOfGenesetType) { + Geneset geneset = genesetService.getGeneset(entityId); + coExpression.setCytoband("-"); + coExpression.setGeneticEntityName(geneset.getName()); + } else { + Gene gene = geneService.getGene(entityId); + coExpression.setCytoband(gene.getCytoband()); + coExpression.setGeneticEntityName(gene.getHugoGeneSymbol()); + } + + double[] valuesBNumber = valuesBCopy.stream().mapToDouble(Double::parseDouble).toArray(); + double[] valuesANumber = valuesACopy.stream().mapToDouble(Double::parseDouble).toArray(); - RealMatrix resultMatrix = spearmansCorrelation.getRankCorrelation().getCorrelationPValues(); - coExpression.setpValue(BigDecimal.valueOf(resultMatrix.getEntry(0, 1))); - - coExpressionList.add(coExpression); + if (valuesANumber.length <= 2) { + return null; } - - return coExpressionList; + + double[][] arrays = new double[2][valuesANumber.length]; + arrays[0] = valuesBNumber; + arrays[1] = valuesANumber; + SpearmansCorrelation spearmansCorrelation = new SpearmansCorrelation((new Array2DRowRealMatrix(arrays, false)).transpose()); + + double spearmansValue = spearmansCorrelation.correlation(valuesBNumber, valuesANumber); + if (Double.isNaN(spearmansValue) || Math.abs(spearmansValue) < threshold) { + return null; + } + coExpression.setSpearmansCorrelation(BigDecimal.valueOf(spearmansValue)); + + RealMatrix resultMatrix = spearmansCorrelation.getRankCorrelation().getCorrelationPValues(); + coExpression.setpValue(BigDecimal.valueOf(resultMatrix.getEntry(0, 1))); + + return coExpression; } } diff --git a/service/src/main/java/org/cbioportal/service/impl/MolecularDataServiceImpl.java b/service/src/main/java/org/cbioportal/service/impl/MolecularDataServiceImpl.java index 793ad1f28bb..f3486fb1d31 100644 --- a/service/src/main/java/org/cbioportal/service/impl/MolecularDataServiceImpl.java +++ b/service/src/main/java/org/cbioportal/service/impl/MolecularDataServiceImpl.java @@ -16,12 +16,7 @@ import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.stereotype.Service; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; @@ -122,12 +117,12 @@ public BaseMeta fetchMetaMolecularData(String molecularProfileId, List s } @Override - public List getMolecularAlterations(String molecularProfileId, - List entrezGeneIds, String projection) + public Iterable getMolecularAlterations(String molecularProfileId, + List entrezGeneIds, String projection) throws MolecularProfileNotFoundException { validateMolecularProfile(molecularProfileId); - return molecularDataRepository.getGeneMolecularAlterations(molecularProfileId, entrezGeneIds, projection); + return molecularDataRepository.getGeneMolecularAlterationsIterable(molecularProfileId, entrezGeneIds, projection); } @Override