diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/GeneRgaConverter.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/GeneRgaConverter.java index bfc61a2fab2..84a0203a155 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/GeneRgaConverter.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/GeneRgaConverter.java @@ -19,7 +19,7 @@ public class GeneRgaConverter extends AbstractRgaConverter { static { CONVERTER_MAP = new HashMap<>(); // We always include individual id in the response because we always want to return the numIndividuals populated - CONVERTER_MAP.put("id", Arrays.asList(RgaDataModel.GENE_ID, RgaDataModel.INDIVIDUAL_ID)); + CONVERTER_MAP.put("id", Arrays.asList(RgaDataModel.GENE_ID, RgaDataModel.INDIVIDUAL_ID, RgaDataModel.CH_PAIRS)); CONVERTER_MAP.put("name", Arrays.asList(RgaDataModel.GENE_ID, RgaDataModel.GENE_NAME, RgaDataModel.INDIVIDUAL_ID)); CONVERTER_MAP.put("chromosome", Arrays.asList(RgaDataModel.GENE_ID, RgaDataModel.CHROMOSOME, RgaDataModel.INDIVIDUAL_ID)); CONVERTER_MAP.put("start", Arrays.asList(RgaDataModel.GENE_ID, RgaDataModel.START, RgaDataModel.INDIVIDUAL_ID)); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaEngine.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaEngine.java index d623dcf3dc5..1251874eb1e 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaEngine.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaEngine.java @@ -192,7 +192,7 @@ private void fixIndividualOptions(QueryOptions queryOptions, Query query, SolrQu public RgaIterator geneQuery(String collection, Query query, QueryOptions queryOptions) throws RgaException { SolrQuery solrQuery = parser.parseQuery(query); fixGeneOptions(queryOptions, query, solrQuery); - solrQuery.setRows(Integer.MAX_VALUE); + solrQuery.setRows(queryOptions.getInt(QueryOptions.LIMIT, Integer.MAX_VALUE)); try { return new RgaIterator(solrManager.getSolrClient(), collection, solrQuery); } catch (SolrServerException e) { @@ -283,13 +283,15 @@ public long count(String collection, Query query) throws RgaException, IOExcepti public DataResult joinFacetQuery(String collection, String externalCollection, Query query, Query externalQuery, QueryOptions queryOptions) throws RgaException, IOException { SolrQuery mainSolrQuery = parser.parseAuxQuery(query); - SolrQuery externalSolrQuery = parser.parseQuery(externalQuery); - - if (externalSolrQuery.getFilterQueries() != null && externalSolrQuery.getFilterQueries().length > 0) { - String externalQueryStr = StringUtils.join(externalSolrQuery.getFilterQueries(), " AND "); - mainSolrQuery.set("v1", externalQueryStr); - mainSolrQuery.addFilterQuery("{!join from=" + RgaDataModel.VARIANTS + " to=" + AuxiliarRgaDataModel.ID - + " fromIndex=" + externalCollection + " v=$v1}"); + if (!externalQuery.isEmpty()) { + SolrQuery externalSolrQuery = parser.parseQuery(externalQuery); + + if (externalSolrQuery.getFilterQueries() != null && externalSolrQuery.getFilterQueries().length > 0) { + String externalQueryStr = StringUtils.join(externalSolrQuery.getFilterQueries(), " AND "); + mainSolrQuery.set("v1", externalQueryStr); + mainSolrQuery.addFilterQuery("{!join from=" + RgaDataModel.VARIANTS + " to=" + AuxiliarRgaDataModel.ID + + " fromIndex=" + externalCollection + " v=$v1}"); + } } return facetedQuery(collection, mainSolrQuery, queryOptions); diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaManager.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaManager.java index c534e03d9ec..848aca3796a 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaManager.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaManager.java @@ -2,6 +2,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; @@ -67,18 +68,25 @@ public class RgaManager implements AutoCloseable { private final IndividualRgaConverter individualRgaConverter; private final GeneRgaConverter geneConverter; + private final VariantRgaConverter variantConverter; + private static final RgaQueryParams.CompHetQueryMode COMP_HET_QUERY_MODE = RgaQueryParams.CompHetQueryMode.PAIR; + private final Logger logger; private static final int KNOCKOUT_INSERT_BATCH_SIZE = 25; + private ConcurrentHashMap> cacheMap; + private final int CACHE_SIZE; + private static final int DEFAULT_CACHE_SIZE = 1000; + public RgaManager(CatalogManager catalogManager, VariantStorageManager variantStorageManager) { this.catalogManager = catalogManager; this.storageConfiguration = variantStorageManager.getStorageConfiguration(); // TODO: Add CompHetQueryMode to configuration file in v2.5.0 - this.rgaEngine = new RgaEngine(this.storageConfiguration, RgaQueryParams.CompHetQueryMode.PAIR); + this.rgaEngine = new RgaEngine(this.storageConfiguration, COMP_HET_QUERY_MODE); this.variantStorageManager = variantStorageManager; this.individualRgaConverter = new IndividualRgaConverter(); @@ -86,6 +94,11 @@ public RgaManager(CatalogManager catalogManager, VariantStorageManager variantSt this.variantConverter = new VariantRgaConverter(); this.logger = LoggerFactory.getLogger(getClass()); + + this.cacheMap = new ConcurrentHashMap<>(); + this.CACHE_SIZE = storageConfiguration.getRga().getCacheSize() > 0 + ? storageConfiguration.getRga().getCacheSize() + : DEFAULT_CACHE_SIZE; } // Visible for testing @@ -100,10 +113,14 @@ public RgaManager(CatalogManager catalogManager, VariantStorageManager variantSt this.variantConverter = new VariantRgaConverter(); this.logger = LoggerFactory.getLogger(getClass()); + + this.cacheMap = new ConcurrentHashMap<>(); + this.CACHE_SIZE = storageConfiguration.getRga().getCacheSize() > 0 + ? storageConfiguration.getRga().getCacheSize() + : DEFAULT_CACHE_SIZE; } // Data load - public void index(String studyStr, String fileStr, String token) throws CatalogException, RgaException, IOException { File file = catalogManager.getFileManager().get(studyStr, fileStr, FileManager.INCLUDE_FILE_URI_PATH, token).first(); Path filePath = Paths.get(file.getUri()); @@ -182,7 +199,7 @@ private void load(String study, Path file, String token) throws RgaException { writer, ParallelTaskRunner.Config.builder() .setBatchSize(1) - .setNumTasks(2) // Write is definitely slower than process. More threads won't help much. + .setNumTasks(1) // Write is definitely slower than process. More threads won't help much. .build() ); @@ -505,14 +522,20 @@ public OpenCGAResult updateRgaInternalIndexStatus(String studyStr, String public OpenCGAResult individualQuery(String studyStr, Query query, QueryOptions options, String token) throws CatalogException, IOException, RgaException { + StopWatch stopWatch = StopWatch.createStarted(); + OpenCGAResult cacheResults = getCacheResults("individualQuery", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String collection = getMainCollectionName(study.getFqn()); - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); + Query finalQuery = parseQuery(query); + Preprocess preprocess; try { - preprocess = individualQueryPreprocess(study, query, options, token); + preprocess = individualQueryPreprocess(study, finalQuery, options, token); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { stopWatch.stop(); @@ -524,7 +547,7 @@ public OpenCGAResult individualQuery(String studyStr, Quer } VariantDBIterator variantDBIterator = VariantDBIterator.EMPTY_ITERATOR; - if (query.containsKey(RgaQueryParams.VARIANTS.key())) { + if (finalQuery.containsKey(RgaQueryParams.VARIANTS.key())) { try { variantDBIterator = variantStorageQuery(studyStr, preprocess.getQuery().getAsStringList(RgaQueryParams.SAMPLE_ID.key()), preprocess.getQuery(), QueryOptions.empty(), token); @@ -578,11 +601,18 @@ public OpenCGAResult individualQuery(String studyStr, Quer result.setEvents(Collections.singletonList(preprocess.getEvent())); } + cacheResults("individualQuery", studyStr, query, options, stopWatch, result); return result; } public OpenCGAResult geneQuery(String studyStr, Query query, QueryOptions options, String token) throws CatalogException, IOException, RgaException { + StopWatch stopWatch = StopWatch.createStarted(); + OpenCGAResult cacheResults = getCacheResults("geneQuery", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String userId = catalogManager.getUserManager().getUserId(token); String collection = getMainCollectionName(study.getFqn()); @@ -590,16 +620,13 @@ public OpenCGAResult geneQuery(String studyStr, Query query, throw new RgaException("Missing RGA indexes for study '" + study.getFqn() + "' or solr server not alive"); } - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - ExecutorService executor = Executors.newFixedThreadPool(4); QueryOptions queryOptions = setDefaultLimit(options); List includeIndividuals = queryOptions.getAsStringList(RgaQueryParams.INCLUDE_INDIVIDUAL); Boolean isOwnerOrAdmin = catalogManager.getAuthorizationManager().isOwnerOrAdmin(study.getUid(), userId); - Query auxQuery = query != null ? new Query(query) : new Query(); + Query finalQuery = parseQuery(query); // Get number of matches Future numMatchesFuture = null; @@ -607,7 +634,7 @@ public OpenCGAResult geneQuery(String studyStr, Query query, numMatchesFuture = executor.submit(() -> { QueryOptions facetOptions = new QueryOptions(QueryOptions.FACET, "unique(" + RgaQueryParams.GENE_ID.key() + ")"); try { - DataResult result = rgaEngine.facetedQuery(collection, auxQuery, facetOptions); + DataResult result = rgaEngine.facetedQuery(collection, finalQuery, facetOptions); return ((Number) result.first().getAggregationValues().get(0)).intValue(); } catch (Exception e) { logger.error("Could not obtain the count: {}", e.getMessage(), e); @@ -618,8 +645,8 @@ public OpenCGAResult geneQuery(String studyStr, Query query, List geneIds; try { - geneIds = getGeneIds(collection, auxQuery, queryOptions); - auxQuery.put(RgaQueryParams.GENE_ID.key(), geneIds); + geneIds = getGeneIds(collection, finalQuery, queryOptions); + finalQuery.put(RgaQueryParams.GENE_ID.key(), geneIds); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { return OpenCGAResult.empty(RgaKnockoutByGene.class, (int) stopWatch.getTime(TimeUnit.MILLISECONDS)); @@ -641,7 +668,7 @@ public OpenCGAResult geneQuery(String studyStr, Query query, includeSampleIds = new HashSet<>((List) authorisedSampleIdResult.getResults()); } else { // 2. Check permissions - DataResult result = rgaEngine.facetedQuery(collection, auxQuery, + DataResult result = rgaEngine.facetedQuery(collection, finalQuery, new QueryOptions(QueryOptions.FACET, RgaDataModel.SAMPLE_ID).append(QueryOptions.LIMIT, -1)); if (result.getNumResults() == 0) { stopWatch.stop(); @@ -674,7 +701,7 @@ public OpenCGAResult geneQuery(String studyStr, Query query, includeSampleIds = new HashSet<>((List) sampleResult.getResults()); } - RgaIterator rgaIterator = rgaEngine.geneQuery(collection, auxQuery, queryOptions); + RgaIterator rgaIterator = rgaEngine.geneQuery(collection, finalQuery, queryOptions); int skipIndividuals = queryOptions.getInt(RgaQueryParams.SKIP_INDIVIDUAL); int limitIndividuals = queryOptions.getInt(RgaQueryParams.LIMIT_INDIVIDUAL, RgaQueryParams.DEFAULT_INDIVIDUAL_LIMIT); @@ -693,6 +720,7 @@ public OpenCGAResult geneQuery(String studyStr, Query query, knockoutResult.setNumMatches(-1); } if (isOwnerOrAdmin && includeSampleIds.isEmpty()) { + cacheResults("geneQuery", studyStr, query, options, stopWatch, knockoutResult); return knockoutResult; } else { // 5. Filter out individual or samples for which user does not have permissions @@ -706,12 +734,19 @@ public OpenCGAResult geneQuery(String studyStr, Query query, knockout.setIndividuals(individualList); } + cacheResults("geneQuery", studyStr, query, options, stopWatch, knockoutResult); return knockoutResult; } } public OpenCGAResult variantQuery(String studyStr, Query query, QueryOptions options, String token) throws CatalogException, IOException, RgaException { + StopWatch stopWatch = StopWatch.createStarted(); + OpenCGAResult cacheResults = getCacheResults("variantQuery", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String userId = catalogManager.getUserManager().getUserId(token); String collection = getMainCollectionName(study.getFqn()); @@ -723,22 +758,17 @@ public OpenCGAResult variantQuery(String studyStr, Query quer throw new RgaException("Missing auxiliar RGA collection for study '" + study.getFqn() + "'"); } - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - ExecutorService executor = Executors.newFixedThreadPool(4); - QueryOptions queryOptions = setDefaultLimit(options); - List includeIndividuals = queryOptions.getAsStringList(RgaQueryParams.INCLUDE_INDIVIDUAL); Boolean isOwnerOrAdmin = catalogManager.getAuthorizationManager().isOwnerOrAdmin(study.getUid(), userId); - Query auxQuery = query != null ? new Query(query) : new Query(); + Query finalQuery = parseQuery(query); ResourceIds resourceIds; try { - resourceIds = getVariantIds(collection, auxCollection, auxQuery, queryOptions, executor); - auxQuery.put(RgaDataModel.VARIANTS, resourceIds.getIds()); + resourceIds = getVariantIds(collection, auxCollection, finalQuery, queryOptions, executor); + finalQuery.put(RgaDataModel.VARIANTS, resourceIds.getIds()); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { return OpenCGAResult.empty(KnockoutByVariant.class, (int) stopWatch.getTime(TimeUnit.MILLISECONDS)); @@ -759,7 +789,7 @@ public OpenCGAResult variantQuery(String studyStr, Query quer includeSampleIds = new HashSet<>((List) authorisedSampleIdResult.getResults()); } else { // 2. Check permissions - DataResult result = rgaEngine.facetedQuery(collection, auxQuery, + DataResult result = rgaEngine.facetedQuery(collection, finalQuery, new QueryOptions(QueryOptions.FACET, RgaDataModel.SAMPLE_ID).append(QueryOptions.LIMIT, -1)); if (result.getNumResults() == 0) { stopWatch.stop(); @@ -794,10 +824,10 @@ public OpenCGAResult variantQuery(String studyStr, Query quer } Future variantFuture = executor.submit( - () -> variantStorageQuery(study.getFqn(), new ArrayList<>(includeSampleIds), auxQuery, options, token) + () -> variantStorageQuery(study.getFqn(), new ArrayList<>(includeSampleIds), finalQuery, options, token) ); - Future rgaIteratorFuture = executor.submit(() -> rgaEngine.variantQuery(collection, auxQuery, queryOptions)); + Future rgaIteratorFuture = executor.submit(() -> rgaEngine.variantQuery(collection, finalQuery, queryOptions)); VariantDBIterator variantDBIterator; try { @@ -818,7 +848,7 @@ public OpenCGAResult variantQuery(String studyStr, Query quer // 4. Solr gene query List knockoutResultList = variantConverter.convertToDataModelType(rgaIterator, variantDBIterator, - auxQuery.getAsStringList(RgaQueryParams.VARIANTS.key()), includeIndividuals, skipIndividuals, limitIndividuals); + finalQuery.getAsStringList(RgaQueryParams.VARIANTS.key()), includeIndividuals, skipIndividuals, limitIndividuals); int time = (int) stopWatch.getTime(TimeUnit.MILLISECONDS); OpenCGAResult knockoutResult = new OpenCGAResult<>(time, Collections.emptyList(), knockoutResultList.size(), @@ -832,6 +862,7 @@ public OpenCGAResult variantQuery(String studyStr, Query quer knockoutResult.setNumMatches(-1); } if (isOwnerOrAdmin && includeSampleIds.isEmpty()) { + cacheResults("variantQuery", studyStr, query, options, stopWatch, knockoutResult); return knockoutResult; } else { // 5. Filter out individual or samples for which user does not have permissions @@ -845,6 +876,7 @@ public OpenCGAResult variantQuery(String studyStr, Query quer knockout.setIndividuals(individualList); } + cacheResults("variantQuery", studyStr, query, options, stopWatch, knockoutResult); return knockoutResult; } } @@ -852,34 +884,42 @@ public OpenCGAResult variantQuery(String studyStr, Query quer // Added to improve performance issues. Need to be addressed properly and add this information in study internal.rga.stats field @Deprecated private Integer getTotalIndividuals(Study study) { - // In the future, this will need to be fetched from study internal. - // Atm, it will be fetched from study.attributes.rga.stats.totalIndividuals - if (study.getAttributes() == null) { - return null; - } - Object rga = study.getAttributes().get("RGA"); - if (rga == null) { - return null; - } - Object stats = ((Map) rga).get("stats"); - if (stats == null) { - return null; - } - Object totalIndividuals = ((Map) stats).get("totalIndividuals"); - if (totalIndividuals != null) { - return Integer.parseInt(String.valueOf(totalIndividuals)); - } else { - return null; - } + return null; +// // In the future, this will need to be fetched from study internal. +// // Atm, it will be fetched from study.attributes.rga.stats.totalIndividuals +// if (study.getAttributes() == null) { +// return null; +// } +// Object rga = study.getAttributes().get("RGA"); +// if (rga == null) { +// return null; +// } +// Object stats = ((Map) rga).get("stats"); +// if (stats == null) { +// return null; +// } +// Object totalIndividuals = ((Map) stats).get("totalIndividuals"); +// if (totalIndividuals != null) { +// return Integer.parseInt(String.valueOf(totalIndividuals)); +// } else { +// return null; +// } } public OpenCGAResult individualSummary(String studyStr, Query query, QueryOptions options, String token) throws RgaException, CatalogException, IOException { StopWatch stopWatch = StopWatch.createStarted(); + OpenCGAResult cacheResults = getCacheResults("individualSummary", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String collection = getMainCollectionName(study.getFqn()); + Query finalQuery = parseQuery(query); + ExecutorService executor = Executors.newFixedThreadPool(4); // Check number of individuals matching query without checking their permissions @@ -892,7 +932,7 @@ public OpenCGAResult individualSummary(String study } else { totalIndividualsFuture = executor.submit(() -> { QueryOptions facetOptions = new QueryOptions(QueryOptions.FACET, "unique(" + RgaDataModel.INDIVIDUAL_ID + ")"); - DataResult result = rgaEngine.facetedQuery(collection, query, facetOptions); + DataResult result = rgaEngine.facetedQuery(collection, finalQuery, facetOptions); return ((Number) result.first().getAggregationValues().get(0)).intValue(); }); } @@ -900,7 +940,7 @@ public OpenCGAResult individualSummary(String study Preprocess preprocess; try { - preprocess = individualQueryPreprocess(study, query, options, token); + preprocess = individualQueryPreprocess(study, finalQuery, options, token); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { stopWatch.stop(); @@ -991,12 +1031,19 @@ public OpenCGAResult individualSummary(String study result.setEvents(Collections.singletonList(preprocess.getEvent())); } + cacheResults("individualSummary", studyStr, query, options, stopWatch, result); return result; } public OpenCGAResult geneSummary(String studyStr, Query query, QueryOptions options, String token) throws CatalogException, IOException, RgaException { StopWatch stopWatch = StopWatch.createStarted(); + + OpenCGAResult cacheResults = getCacheResults("geneSummary", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String userId = catalogManager.getUserManager().getUserId(token); String collection = getMainCollectionName(study.getFqn()); @@ -1010,7 +1057,7 @@ public OpenCGAResult geneSummary(String studyStr, Query q ExecutorService executor = Executors.newFixedThreadPool(4); QueryOptions queryOptions = setDefaultLimit(options); - Query auxQuery = query != null ? new Query(query) : new Query(); + Query finalQuery = parseQuery(query); // Get number of matches Future numMatchesFuture = null; @@ -1018,7 +1065,7 @@ public OpenCGAResult geneSummary(String studyStr, Query q numMatchesFuture = executor.submit(() -> { QueryOptions facetOptions = new QueryOptions(QueryOptions.FACET, "unique(" + RgaQueryParams.GENE_ID.key() + ")"); try { - DataResult result = rgaEngine.facetedQuery(collection, auxQuery, facetOptions); + DataResult result = rgaEngine.facetedQuery(collection, finalQuery, facetOptions); return ((Number) result.first().getAggregationValues().get(0)).intValue(); } catch (Exception e) { logger.error("Could not obtain the count: {}", e.getMessage(), e); @@ -1029,8 +1076,8 @@ public OpenCGAResult geneSummary(String studyStr, Query q List geneIds; try { - geneIds = getGeneIds(collection, auxQuery, queryOptions); - auxQuery.remove(RgaQueryParams.GENE_ID.key()); + geneIds = getGeneIds(collection, finalQuery, queryOptions); + finalQuery.remove(RgaQueryParams.GENE_ID.key()); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { return OpenCGAResult.empty(KnockoutByGeneSummary.class, (int) stopWatch.getTime(TimeUnit.MILLISECONDS)); @@ -1040,7 +1087,7 @@ public OpenCGAResult geneSummary(String studyStr, Query q List> geneSummaryFutureList = new ArrayList<>(geneIds.size()); for (String geneId : geneIds) { - geneSummaryFutureList.add(executor.submit(() -> calculateGeneSummary(collection, auxQuery, geneId))); + geneSummaryFutureList.add(executor.submit(() -> calculateGeneSummary(collection, finalQuery, geneId))); } List knockoutByGeneSummaryList = new ArrayList<>(geneIds.size()); @@ -1066,13 +1113,21 @@ public OpenCGAResult geneSummary(String studyStr, Query q } int time = (int) stopWatch.getTime(TimeUnit.MILLISECONDS); - return new OpenCGAResult<>(time, Collections.emptyList(), knockoutByGeneSummaryList.size(), knockoutByGeneSummaryList, numMatches); + OpenCGAResult result = new OpenCGAResult<>(time, Collections.emptyList(), knockoutByGeneSummaryList.size(), + knockoutByGeneSummaryList, numMatches); + cacheResults("geneSummary", studyStr, query, options, stopWatch, result); + return result; } public OpenCGAResult variantSummary(String studyStr, Query query, QueryOptions options, String token) throws CatalogException, IOException, RgaException { StopWatch stopWatch = StopWatch.createStarted(); + OpenCGAResult cacheResults = getCacheResults("variantSummary", studyStr, query, options, stopWatch); + if (cacheResults != null) { + return cacheResults; + } + Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); String userId = catalogManager.getUserManager().getUserId(token); String collection = getMainCollectionName(study.getFqn()); @@ -1090,12 +1145,12 @@ public OpenCGAResult variantSummary(String studyStr, Q ExecutorService executor = Executors.newFixedThreadPool(4); QueryOptions queryOptions = setDefaultLimit(options); - Query auxQuery = query != null ? new Query(query) : new Query(); + Query finalQuery = parseQuery(query); ResourceIds resourceIds; try { - resourceIds = getVariantIds(collection, auxCollection, auxQuery, queryOptions, executor); - auxQuery.put(RgaDataModel.VARIANTS, resourceIds.getIds()); + resourceIds = getVariantIds(collection, auxCollection, finalQuery, queryOptions, executor); + finalQuery.put(RgaDataModel.VARIANTS, resourceIds.getIds()); } catch (RgaException e) { if (RgaException.NO_RESULTS_FOUND.equals(e.getMessage())) { return OpenCGAResult.empty(KnockoutByVariantSummary.class, (int) stopWatch.getTime(TimeUnit.MILLISECONDS)); @@ -1104,12 +1159,12 @@ public OpenCGAResult variantSummary(String studyStr, Q } Future variantFuture = executor.submit( - () -> variantStorageQuery(study.getFqn(), Collections.emptyList(), auxQuery, QueryOptions.empty(), token) + () -> variantStorageQuery(study.getFqn(), Collections.emptyList(), finalQuery, QueryOptions.empty(), token) ); List> variantSummaryList = new ArrayList<>(resourceIds.getIds().size()); for (String variantId : resourceIds.getIds()) { - variantSummaryList.add(executor.submit(() -> calculatePartialSolrVariantSummary(collection, auxQuery, variantId))); + variantSummaryList.add(executor.submit(() -> calculatePartialSolrVariantSummary(collection, finalQuery, variantId))); } Map variantSummaryMap = new HashMap<>(); @@ -1172,9 +1227,31 @@ public OpenCGAResult variantSummary(String studyStr, Q if (CollectionUtils.isNotEmpty(resourceIds.getEvents())) { result.setEvents(resourceIds.getEvents()); } + + cacheResults("variantSummary", studyStr, query, options, stopWatch, result); return result; } + private Query parseQuery(Query query) { + Query myQuery = query != null ? new Query(query) : new Query(); + // That's the condition we would need to apply to change these filters. + // TODO: + // Because we are also adding some special filters for the DELETION_OVERLAP variants, we ALWAYS need to ensure that the query + // filters by knockout type. In the future, we should fix the DELETION_OVERLAP issue and then we will be able to uncomment + // the condition below. +// if (COMP_HET_QUERY_MODE.equals(RgaQueryParams.CompHetQueryMode.PAIR) && !myQuery.containsKey(RgaQueryParams.KNOCKOUT.key())) { + // Fill with all knockout types to ensure comp_het queries are performed as pairs + if (!myQuery.containsKey(RgaQueryParams.KNOCKOUT.key())) { + List knockoutValues = EnumSet.allOf(KnockoutVariant.KnockoutType.class) + .stream() + .map(Enum::name) + .collect(Collectors.toList()); + myQuery.append(RgaQueryParams.KNOCKOUT.key(), knockoutValues); + } +// } + return myQuery; + } + public OpenCGAResult aggregationStats(String studyStr, Query query, QueryOptions options, String fields, String token) throws CatalogException, IOException, RgaException { Study study = catalogManager.getStudyManager().get(studyStr, QueryOptions.empty(), token).first(); @@ -1254,7 +1331,7 @@ private ResourceIds getVariantIdsFromMainCollection(String mainCollection, Query List eventList = new ArrayList<>(); Future numMatchesFuture = null; - KnockoutTypeCount knockoutTypeCount = new KnockoutTypeCount(query); + VariantKnockoutTypeCount knockoutTypeCount = new VariantKnockoutTypeCount(query, COMP_HET_QUERY_MODE); Set ids = new HashSet<>(); Set skippedIds = new HashSet<>(); List buckets = facetFieldDataResult.first().getBuckets(); @@ -1302,8 +1379,8 @@ private ResourceIds getVariantIdsJoiningCollections(String mainCollection, Strin ExecutorService executor) throws RgaException, IOException { Future numMatchesFuture = null; List ids; - Query mainCollQuery = generateQuery(query, AuxiliarRgaDataModel.MAIN_TO_AUXILIAR_DATA_MODEL_MAP.keySet(), true); - Query auxCollQuery = generateQuery(query, AuxiliarRgaDataModel.MAIN_TO_AUXILIAR_DATA_MODEL_MAP.keySet(), false); + Query mainCollQuery = new Query(query); // Everything is used for the main collection + Query auxCollQuery = generateQuery(query, AuxiliarRgaDataModel.MAIN_TO_AUXILIAR_DATA_MODEL_MAP.keySet()); // Make a join with the main collection to get all the data we need !! @@ -1345,15 +1422,14 @@ private boolean isQueryingByIndividualFields(Query query) { /** * Generate a new query based on the original query. * - * @param query Original query from where it will be generated the new query. - * @param fields Fields to be added in the new query (unless inverse is true). - * @param inverse Flag indicating to generate a new query with the fields passed or absent. + * @param query Original query from where it will be generated the new query. + * @param fields Fields to be added in the new query. * @return a new query object. */ - private Query generateQuery(Query query, Set fields, boolean inverse) { + private Query generateQuery(Query query, Set fields) { Query newQuery = new Query(); for (Map.Entry entry : query.entrySet()) { - if ((fields.contains(entry.getKey()) && !inverse) || (!fields.contains(entry.getKey()) && inverse)) { + if (fields.contains(entry.getKey())) { newQuery.put(entry.getKey(), entry.getValue()); } } @@ -1429,13 +1505,13 @@ private KnockoutByVariantSummary calculatePartialSolrVariantSummary(String colle .append(QueryOptions.LIMIT, -1) .append(QueryOptions.FACET, RgaDataModel.INDIVIDUAL_SUMMARY); facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, geneFacet); - KnockoutTypeCount noParentsCount = new KnockoutTypeCount(auxQuery); - KnockoutTypeCount singleParentCount = new KnockoutTypeCount(auxQuery); - KnockoutTypeCount bothParentsCount = new KnockoutTypeCount(auxQuery); + IndividualKnockoutTypeCount noParentsCount = new IndividualKnockoutTypeCount(auxQuery); + IndividualKnockoutTypeCount singleParentCount = new IndividualKnockoutTypeCount(auxQuery); + IndividualKnockoutTypeCount bothParentsCount = new IndividualKnockoutTypeCount(auxQuery); for (FacetField.Bucket bucket : facetFieldDataResult.first().getBuckets()) { CodedIndividual codedIndividual = CodedIndividual.parseEncodedId(bucket.getValue()); - KnockoutTypeCount auxKnockoutType; + IndividualKnockoutTypeCount auxKnockoutType; switch (codedIndividual.getNumParents()) { case 0: auxKnockoutType = noParentsCount; @@ -1452,18 +1528,22 @@ private KnockoutByVariantSummary calculatePartialSolrVariantSummary(String colle auxKnockoutType.processFeature(codedIndividual); } + noParentsCount.calculateStats(); + singleParentCount.calculateStats(); + bothParentsCount.calculateStats(); + IndividualKnockoutStats noParentIndividualStats = new IndividualKnockoutStats(noParentsCount.getNumIds(), - noParentsCount.getNumHomIds(), noParentsCount.getNumCompHetIds(), noParentsCount.getNumHetIds(), + noParentsCount.getNumHomAltIds(), noParentsCount.getNumCompHetIds(), noParentsCount.getNumHetIds(), noParentsCount.getNumDelOverlapIds(), noParentsCount.getNumHomAltCompHetIds(), noParentsCount.getNumCompHetDelOverlapIds() ); IndividualKnockoutStats singleParentIndividualStats = new IndividualKnockoutStats(singleParentCount.getNumIds(), - singleParentCount.getNumHomIds(), singleParentCount.getNumCompHetIds(), singleParentCount.getNumHetIds(), + singleParentCount.getNumHomAltIds(), singleParentCount.getNumCompHetIds(), singleParentCount.getNumHetIds(), singleParentCount.getNumDelOverlapIds(), singleParentCount.getNumHomAltCompHetIds(), singleParentCount.getNumCompHetDelOverlapIds() ); IndividualKnockoutStats bothParentIndividualStats = new IndividualKnockoutStats(bothParentsCount.getNumIds(), - bothParentsCount.getNumHomIds(), bothParentsCount.getNumCompHetIds(), bothParentsCount.getNumHetIds(), + bothParentsCount.getNumHomAltIds(), bothParentsCount.getNumCompHetIds(), bothParentsCount.getNumHetIds(), bothParentsCount.getNumDelOverlapIds(), bothParentsCount.getNumHomAltCompHetIds(), bothParentsCount.getNumCompHetDelOverlapIds() ); @@ -1486,7 +1566,7 @@ private KnockoutByVariantSummary calculatePartialSolrVariantSummary(String colle Query knockoutTypeQuery = new Query(query); knockoutTypeQuery.remove(RgaQueryParams.VARIANTS.key()); knockoutTypeQuery.remove(RgaQueryParams.DB_SNPS.key()); - KnockoutTypeCount knockoutTypeCount = new KnockoutTypeCount(knockoutTypeQuery); + VariantKnockoutTypeCount knockoutTypeCount = new VariantKnockoutTypeCount(knockoutTypeQuery, COMP_HET_QUERY_MODE); for (FacetField.Bucket bucket : facetFieldDataResult.first().getBuckets()) { CodedVariant codedVariant = CodedVariant.parseEncodedId(bucket.getValue()); @@ -1505,6 +1585,7 @@ private KnockoutByVariantSummary calculatePartialSolrVariantSummary(String colle otherVariantSet.add(auxKnockoutVariant); } } + knockoutTypeCount.calculateStats(); List sequenceOntologyTermList = new ArrayList<>(sequenceOntologyTerms.size()); for (String ct : sequenceOntologyTerms) { String ctName = decode(ct); @@ -1530,55 +1611,75 @@ private KnockoutByGeneSummary calculateGeneSummary(String collection, Query quer Query auxQuery = new Query(query); auxQuery.put(RgaQueryParams.GENE_ID.key(), geneId); + StopWatch stopWatch = StopWatch.createStarted(); // 1. Get KnockoutByGene information - Query individualQuery = new Query(RgaQueryParams.GENE_ID.key(), geneId); + Query geneQuery = new Query(RgaQueryParams.GENE_ID.key(), geneId); QueryOptions options = new QueryOptions() - .append(QueryOptions.LIMIT, 1) - .append(QueryOptions.EXCLUDE, "individuals"); - RgaIterator rgaIterator = rgaEngine.geneQuery(collection, individualQuery, options); + .append(QueryOptions.EXCLUDE, "individuals") + .append(QueryOptions.LIMIT, 1); + RgaIterator rgaIterator = rgaEngine.geneQuery(collection, geneQuery, options); + logger.debug("Gene query: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); if (!rgaIterator.hasNext()) { throw RgaException.noResultsMatching(); } + + VariantKnockoutTypeCount knockoutTypeCount = new VariantKnockoutTypeCount(auxQuery, COMP_HET_QUERY_MODE); RgaDataModel rgaDataModel = rgaIterator.next(); + + stopWatch.reset(); + stopWatch.start(); + QueryOptions variantFacet = new QueryOptions() + .append(QueryOptions.LIMIT, -1) + .append(QueryOptions.FACET, RgaDataModel.CH_PAIRS); + DataResult facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, variantFacet); + logger.debug("Gene CH pairs facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + for (FacetField.Bucket variantBucket : facetFieldDataResult.first().getBuckets()) { + CodedChPairVariants codedChPairVariants = CodedChPairVariants.parseEncodedId(variantBucket.getValue()); + knockoutTypeCount.processChPairFeature(codedChPairVariants); + } + logger.debug("Gene CH pairs facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + + // To get the basic gene information, we can use any document from RgaDataModel. In this case, we use the last document KnockoutByGeneSummary geneSummary = new KnockoutByGeneSummary(rgaDataModel.getGeneId(), rgaDataModel.getGeneName(), rgaDataModel.getChromosome(), rgaDataModel.getStart(), rgaDataModel.getEnd(), rgaDataModel.getStrand(), rgaDataModel.getGeneBiotype(), null, null); + stopWatch.reset(); + stopWatch.start(); // 2. Get KnockoutType counts QueryOptions knockoutTypeFacet = new QueryOptions() .append(QueryOptions.LIMIT, -1) .append(QueryOptions.FACET, RgaDataModel.VARIANT_SUMMARY); - DataResult facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, knockoutTypeFacet); - KnockoutTypeCount knockoutTypeCount = new KnockoutTypeCount(auxQuery); - if (CollectionUtils.isNotEmpty(rgaDataModel.getChPairs())) { - for (String chPair : rgaDataModel.getChPairs()) { - CodedChPairVariants codedChPairVariants = CodedChPairVariants.parseEncodedId(chPair); - knockoutTypeCount.processChPairFeature(codedChPairVariants); - } - } + facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, knockoutTypeFacet); + logger.debug("Gene VariantSummary facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); for (FacetField.Bucket variantBucket : facetFieldDataResult.first().getBuckets()) { CodedVariant codedFeature = CodedVariant.parseEncodedId(variantBucket.getValue()); knockoutTypeCount.processFeature(codedFeature); } - VariantKnockoutStats variantStats = new VariantKnockoutStats(knockoutTypeCount.getNumIds(), knockoutTypeCount.getNumHomIds(), + knockoutTypeCount.calculateStats(); + VariantKnockoutStats variantStats = new VariantKnockoutStats(knockoutTypeCount.getNumIds(), knockoutTypeCount.getNumHomAltIds(), knockoutTypeCount.getNumCompHetIds(), knockoutTypeCount.getNumPairedCompHetIds(), knockoutTypeCount.getNumPairedDelOverlapIds(), knockoutTypeCount.getNumHetIds(), knockoutTypeCount.getNumDelOverlapIds()); geneSummary.setVariantStats(variantStats); + logger.debug("Gene VariantSummary facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + stopWatch.reset(); + stopWatch.start(); // 3. Get individual knockout type counts QueryOptions geneFacet = new QueryOptions() .append(QueryOptions.LIMIT, -1) .append(QueryOptions.FACET, RgaDataModel.INDIVIDUAL_SUMMARY); facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, geneFacet); - KnockoutTypeCount noParentsCount = new KnockoutTypeCount(auxQuery); - KnockoutTypeCount singleParentCount = new KnockoutTypeCount(auxQuery); - KnockoutTypeCount bothParentsCount = new KnockoutTypeCount(auxQuery); + logger.debug("Gene IndividualSummary facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + IndividualKnockoutTypeCount noParentsCount = new IndividualKnockoutTypeCount(auxQuery); + IndividualKnockoutTypeCount singleParentCount = new IndividualKnockoutTypeCount(auxQuery); + IndividualKnockoutTypeCount bothParentsCount = new IndividualKnockoutTypeCount(auxQuery); for (FacetField.Bucket bucket : facetFieldDataResult.first().getBuckets()) { CodedIndividual codedIndividual = CodedIndividual.parseEncodedId(bucket.getValue()); - KnockoutTypeCount auxKnockoutType; + IndividualKnockoutTypeCount auxKnockoutType; switch (codedIndividual.getNumParents()) { case 0: auxKnockoutType = noParentsCount; @@ -1595,20 +1696,25 @@ private KnockoutByGeneSummary calculateGeneSummary(String collection, Query quer auxKnockoutType.processFeature(codedIndividual); } + noParentsCount.calculateStats(); + singleParentCount.calculateStats(); + bothParentsCount.calculateStats(); + IndividualKnockoutStats noParentIndividualStats = new IndividualKnockoutStats(noParentsCount.getNumIds(), - noParentsCount.getNumHomIds(), noParentsCount.getNumCompHetIds(), noParentsCount.getNumHetIds(), + noParentsCount.getNumHomAltIds(), noParentsCount.getNumCompHetIds(), noParentsCount.getNumHetIds(), noParentsCount.getNumDelOverlapIds(), noParentsCount.getNumHomAltCompHetIds(), noParentsCount.getNumCompHetDelOverlapIds() ); IndividualKnockoutStats singleParentIndividualStats = new IndividualKnockoutStats(singleParentCount.getNumIds(), - singleParentCount.getNumHomIds(), singleParentCount.getNumCompHetIds(), singleParentCount.getNumHetIds(), + singleParentCount.getNumHomAltIds(), singleParentCount.getNumCompHetIds(), singleParentCount.getNumHetIds(), singleParentCount.getNumDelOverlapIds(), singleParentCount.getNumHomAltCompHetIds(), singleParentCount.getNumCompHetDelOverlapIds() ); IndividualKnockoutStats bothParentIndividualStats = new IndividualKnockoutStats(bothParentsCount.getNumIds(), - bothParentsCount.getNumHomIds(), bothParentsCount.getNumCompHetIds(), bothParentsCount.getNumHetIds(), + bothParentsCount.getNumHomAltIds(), bothParentsCount.getNumCompHetIds(), bothParentsCount.getNumHetIds(), bothParentsCount.getNumDelOverlapIds(), bothParentsCount.getNumHomAltCompHetIds(), bothParentsCount.getNumCompHetDelOverlapIds() ); + logger.debug("Gene IndividualSummary facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); geneSummary.setIndividualStats(new GlobalIndividualKnockoutStats(noParentIndividualStats, singleParentIndividualStats, bothParentIndividualStats)); @@ -1621,42 +1727,56 @@ private KnockoutByIndividualSummary calculateIndividualSummary(String collection Query auxQuery = new Query(query); auxQuery.put(RgaQueryParams.SAMPLE_ID.key(), sampleId); + StopWatch stopWatch = StopWatch.createStarted(); // 1. Get KnockoutByIndividual information QueryOptions options = new QueryOptions() - .append(QueryOptions.LIMIT, 1) - .append(QueryOptions.EXCLUDE, "genes"); + .append(QueryOptions.EXCLUDE, "genes") + .append(QueryOptions.LIMIT, 1); RgaIterator rgaIterator = rgaEngine.individualQuery(collection, auxQuery, options); + logger.debug("Individual query: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); if (!rgaIterator.hasNext()) { throw RgaException.noResultsMatching(); } - KnockoutTypeCount knockoutTypeCount = new KnockoutTypeCount(auxQuery); + VariantKnockoutTypeCount knockoutTypeCount = new VariantKnockoutTypeCount(auxQuery, COMP_HET_QUERY_MODE); RgaDataModel rgaDataModel = rgaIterator.next(); - if (CollectionUtils.isNotEmpty(rgaDataModel.getChPairs())) { - for (String chPair : rgaDataModel.getChPairs()) { - CodedChPairVariants codedChPairVariants = CodedChPairVariants.parseEncodedId(chPair); - knockoutTypeCount.processChPairFeature(codedChPairVariants); - } - } + stopWatch.reset(); + stopWatch.start(); + QueryOptions variantFacet = new QueryOptions() + .append(QueryOptions.LIMIT, -1) + .append(QueryOptions.FACET, RgaDataModel.CH_PAIRS); + DataResult facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, variantFacet); + logger.debug("Individual CH pairs facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + for (FacetField.Bucket variantBucket : facetFieldDataResult.first().getBuckets()) { + CodedChPairVariants codedChPairVariants = CodedChPairVariants.parseEncodedId(variantBucket.getValue()); + knockoutTypeCount.processChPairFeature(codedChPairVariants); + } + logger.debug("Individual CH pairs facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); KnockoutByIndividual knockoutByIndividual = AbstractRgaConverter.fillIndividualInfo(rgaDataModel); KnockoutByIndividualSummary knockoutByIndividualSummary = new KnockoutByIndividualSummary(knockoutByIndividual); + stopWatch.reset(); + stopWatch.start(); // 2. Get KnockoutType counts QueryOptions knockoutTypeFacet = new QueryOptions() .append(QueryOptions.LIMIT, -1) .append(QueryOptions.FACET, RgaDataModel.VARIANT_SUMMARY); - DataResult facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, knockoutTypeFacet); + facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, knockoutTypeFacet); + logger.debug("Individual VariantSummary facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); + for (FacetField.Bucket variantBucket : facetFieldDataResult.first().getBuckets()) { CodedVariant codedFeature = CodedVariant.parseEncodedId(variantBucket.getValue()); knockoutTypeCount.processFeature(codedFeature); } - VariantKnockoutStats variantStats = new VariantKnockoutStats(knockoutTypeCount.getNumIds(), knockoutTypeCount.getNumHomIds(), + knockoutTypeCount.calculateStats(); + VariantKnockoutStats variantStats = new VariantKnockoutStats(knockoutTypeCount.getNumIds(), knockoutTypeCount.getNumHomAltIds(), knockoutTypeCount.getNumCompHetIds(), knockoutTypeCount.getNumPairedCompHetIds(), knockoutTypeCount.getNumPairedDelOverlapIds(), knockoutTypeCount.getNumHetIds(), knockoutTypeCount.getNumDelOverlapIds()); knockoutByIndividualSummary.setVariantStats(variantStats); + logger.debug("Individual VariantSummary facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); // Use list of variants filtered matching all criteria if the number of variants is lower than 100. Otherwise, variants will not be // used to get the list of genes. If we don't apply this limit, the url may be too long and fail. @@ -1664,16 +1784,20 @@ private KnockoutByIndividualSummary calculateIndividualSummary(String collection auxQuery.put(RgaQueryParams.VARIANTS.key(), new ArrayList<>(knockoutTypeCount.getIds())); } + stopWatch.reset(); + stopWatch.start(); // 3. Get gene name list QueryOptions geneFacet = new QueryOptions() .append(QueryOptions.LIMIT, -1) .append(QueryOptions.FACET, RgaDataModel.GENE_NAME); facetFieldDataResult = rgaEngine.facetedQuery(collection, auxQuery, geneFacet); + logger.debug("Individual GeneName facet: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); List geneIds = facetFieldDataResult.first().getBuckets() .stream() .map(FacetField.Bucket::getValue) .collect(Collectors.toList()); knockoutByIndividualSummary.setGenes(geneIds); + logger.debug("Individual GeneName facet and process: {} ms", stopWatch.getTime(TimeUnit.MILLISECONDS)); return knockoutByIndividualSummary; } @@ -1838,11 +1962,13 @@ public void testConnection() throws StorageEngineException { } private String getMainCollectionName(String study) { - return catalogManager.getConfiguration().getDatabasePrefix() + "-rga-" + study.replace("@", "_").replace(":", "_"); + return catalogManager.getConfiguration().getDatabasePrefix() + "-rga-" + study.replace("@", "_").replace(":", "_") + + (storageConfiguration.getRga().getSuffix() != null ? storageConfiguration.getRga().getSuffix() : ""); } private String getAuxCollectionName(String study) { - return catalogManager.getConfiguration().getDatabasePrefix() + "-rga-aux-" + study.replace("@", "_").replace(":", "_"); + return catalogManager.getConfiguration().getDatabasePrefix() + "-rga-aux-" + study.replace("@", "_").replace(":", "_") + + (storageConfiguration.getRga().getSuffix() != null ? storageConfiguration.getRga().getSuffix() : ""); } @Override @@ -1984,4 +2110,62 @@ public Preprocess setEvent(Event event) { return this; } } + + /* + CACHE METHODS + */ + private String generateCacheKey(String method, String studyStr, Query query, QueryOptions options) { + ObjectMap map = new ObjectMap() + .append("method", method) + .append("study", studyStr); + if (query != null) { + map.putAll(query); + } + if (options != null) { + map.putAll(options); + } + // Sort the keys + List sortedKeys = map.keySet().stream().sorted().collect(Collectors.toList()); + List queryList = new ArrayList<>(map.size()); + for (String key : sortedKeys) { + queryList.add(key + "=" + map.get(key)); + } + return DigestUtils.sha256Hex(StringUtils.join(queryList, ";")); + } + + private void cacheResults(String method, String studyStr, Query query, QueryOptions options, StopWatch stopWatch, + OpenCGAResult result) { + if (!storageConfiguration.getRga().isCache()) { + // Cache is disabled + return; + } + + if (cacheMap.size() > CACHE_SIZE) { + // Cache is already full + logger.warn("Query not cached. Cache is already full (size: {}).", CACHE_SIZE); + return; + } + + if (stopWatch.getTime(TimeUnit.SECONDS) < 4) { + logger.debug("Query not cached. It took less than 4 seconds: {} ms.", stopWatch.getTime(TimeUnit.MILLISECONDS)); + } + + String cacheKey = generateCacheKey(method, studyStr, query, options); + cacheMap.putIfAbsent(cacheKey, result); + } + + private OpenCGAResult getCacheResults(String method, String studyStr, Query query, QueryOptions options, StopWatch stopWatch) { + if (!storageConfiguration.getRga().isCache()) { + // Cache is disabled + return null; + } + String cacheKey = generateCacheKey(method, studyStr, query, options); + OpenCGAResult result = cacheMap.get(cacheKey); + if (result != null) { + result.addEvent(new Event(Event.Type.INFO, "Results obtained from cache")); + result.setTime((int) stopWatch.getTime(TimeUnit.MILLISECONDS)); + return (OpenCGAResult) result; + } + return null; + } } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaQueryParser.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaQueryParser.java index a4f27825c1d..644f70f8cf2 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaQueryParser.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaQueryParser.java @@ -15,6 +15,7 @@ import java.util.*; import java.util.function.Predicate; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static org.opencb.opencga.analysis.rga.RgaQueryParams.*; import static org.opencb.opencga.core.models.analysis.knockout.KnockoutVariant.KnockoutType.*; @@ -28,6 +29,26 @@ public class RgaQueryParser { protected static Logger logger = LoggerFactory.getLogger(RgaQueryParser.class); + private static final List ALL_CONSEQUENCE_TYPES; + private static final List ALL_PAIRED_CONSEQUENCE_TYPES; + private static final List INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES; + private static final List INCLUDED_DEL_OVERLAP_PAIR_CTS; + + static { + List excludedDelOverlapCts = getEncodedConsequenceTypes(Arrays.asList("missense_variant", "frameshift_variant", + "incomplete_terminal_codon_variant", "start_lost", "stop_gained", "stop_lost", "splice_acceptor_variant", + "splice_donor_variant", "splice_region_variant")); + + // Exclude DELETION_OVERLAP variants with consequence types: missense_variant + ALL_CONSEQUENCE_TYPES = getEncodedConsequenceTypes(RgaUtils.CONSEQUENCE_TYPE_LIST); + ALL_PAIRED_CONSEQUENCE_TYPES = generateSortedCombinations(ALL_CONSEQUENCE_TYPES); + INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES = ALL_CONSEQUENCE_TYPES + .stream() + .filter(ct -> !excludedDelOverlapCts.contains(ct)) + .collect(Collectors.toList()); + INCLUDED_DEL_OVERLAP_PAIR_CTS = generateSortedCombinations(INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES); + } + public RgaQueryParser() { this(CompHetQueryMode.SINGLE); } @@ -151,7 +172,9 @@ private void parseMainCollCompoundFilters(Query query, List filterList) count += ctValues.isEmpty() ? 0 : 1; count += popFreqValues.isEmpty() ? 0 : 1; - if (count == 1) { + boolean simpleFilter = !knockoutValues.contains(COMP_HET.name()) && !knockoutValues.contains(DELETION_OVERLAP.name()) && count == 1; + + if (simpleFilter) { // Simple filter parseStringValue(query, KNOCKOUT, RgaDataModel.KNOCKOUT_TYPES, filterList); parseStringValue(query, FILTER, RgaDataModel.FILTERS, filterList); @@ -164,7 +187,7 @@ private void parseMainCollCompoundFilters(Query query, List filterList) parseStringValue(entry.getValue(), RgaDataModel.POPULATION_FREQUENCIES.replace("*", entry.getKey()), filterList, "||"); } } - } else if (count > 1) { + } else { buildComplexQueryFilter(filterList, knockoutValues, filterValue, ctValues, popFreqValues); } } @@ -181,9 +204,7 @@ private void parseAuxCollCompoundFilters(Query query, List filterList) t count += ctValues.isEmpty() ? 0 : 1; count += popFreqValues.isEmpty() ? 0 : 1; - // In this case, we may need to use both filters if users are filtering by COMP_HET and another ko type + (ct | pf) - boolean simpleFilter = !knockoutValues.contains(COMP_HET.name()) || count == 1; - boolean complexFilter = knockoutValues.contains(COMP_HET.name()) && count > 1; + boolean simpleFilter = !knockoutValues.contains(COMP_HET.name()) && !knockoutValues.contains(DELETION_OVERLAP.name()) && count == 1; if (simpleFilter) { // Simple filters @@ -201,8 +222,7 @@ private void parseAuxCollCompoundFilters(Query query, List filterList) t AuxiliarRgaDataModel.POPULATION_FREQUENCIES.replace("*", entry.getKey()), filterList, "||"); } } - } - if (complexFilter) { + } else { buildComplexQueryFilter(filterList, knockoutValues, "", ctValues, popFreqValues); } } @@ -265,7 +285,7 @@ private void buildComplexQueryFilter(List filterList, List knock buildComplexQuery(koValues, filterValues, ctValues, popFreqQueryList, filterList); } - private List getEncodedConsequenceTypes(List originalCtList) { + private static List getEncodedConsequenceTypes(List originalCtList) { if (CollectionUtils.isEmpty(originalCtList)) { return Collections.emptyList(); } @@ -280,6 +300,7 @@ private List getEncodedConsequenceTypes(List originalCtList) { private void buildComplexQuery(List koValues, List filterValues, List ctValues, Map> popFreqQueryList, List filterList) throws RgaException { String encodedChString = RgaUtils.encode(COMP_HET.name()); + String delOverlap = RgaUtils.encode(DELETION_OVERLAP.name()); List chFilterValues = filterValues; List chCtValues = ctValues; @@ -287,52 +308,124 @@ private void buildComplexQuery(List koValues, List filterValues, // To generate pairs to query for complete COMP_HET variants chFilterValues = generateSortedCombinations(filterValues); chCtValues = generateSortedCombinations(ctValues); + if (popFreqQueryList.size() == 1) { + // Add the missing pair so queries are done properly + if (popFreqQueryList.keySet().contains(RgaUtils.GNOMAD_GENOMES_STUDY)) { + List missingPopFreq = Collections.singletonList(RgaUtils.THOUSAND_GENOMES_STUDY + ":ALL>=0"); + Map> tmpMap = RgaUtils.parsePopulationFrequencyQuery(missingPopFreq); + popFreqQueryList.putAll(tmpMap); + } else if (popFreqQueryList.keySet().contains(RgaUtils.THOUSAND_GENOMES_STUDY)) { + List missingPopFreq = Collections.singletonList(RgaUtils.GNOMAD_GENOMES_STUDY + ":ALL>=0"); + Map> tmpMap = RgaUtils.parsePopulationFrequencyQuery(missingPopFreq); + popFreqQueryList.putAll(tmpMap); + } + } } if (ctValues.isEmpty() && popFreqQueryList.isEmpty()) { // KT + FILTER List orFilterList = new LinkedList<>(); for (String koValue : koValues) { - List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; - for (String filterVal : finalFilterValues) { - orFilterList.add(koValue + SEPARATOR + filterVal); + if (compHetQueryMode.equals(CompHetQueryMode.PAIR) && koValue.equals(encodedChString)) { + for (String filterVal : chFilterValues) { + orFilterList.add(koValue + SEPARATOR + filterVal); + } + } else { + for (String filterVal : filterValues) { + if (koValue.equals(delOverlap)) { + for (String ctValue : INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES) { + orFilterList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue); + } + } else { + orFilterList.add(koValue + SEPARATOR + filterVal); + } + } } } parseStringValue(orFilterList, RgaDataModel.COMPOUND_FILTERS, filterList, "||"); } else if (!ctValues.isEmpty() && !popFreqQueryList.isEmpty()) { // KT + FILTER + CT + POP_FREQ - List andQueryList = new ArrayList<>(popFreqQueryList.size()); + List andQueryList = new LinkedList<>(); if (popFreqQueryList.size() == 2) { - ArrayList popFreqKeys = new ArrayList<>(popFreqQueryList.keySet()); - List> sortedPopFreqs = RgaUtils.generateSortedCombinations(popFreqQueryList.get(popFreqKeys.get(0)), - popFreqQueryList.get(popFreqKeys.get(1))); - for (List sortedPopFreq : sortedPopFreqs) { - List orQueryList = new LinkedList<>(); - for (String koValue : koValues) { - List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; - List finalCtValues = koValue.equals(encodedChString) ? chCtValues : ctValues; - for (String filterVal : finalFilterValues) { - for (String ctValue : finalCtValues) { - orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + sortedPopFreq.get(0) - + SEPARATOR + sortedPopFreq.get(1)); + List koQueryList = new LinkedList<>(); + for (String koValue : koValues) { + if (compHetQueryMode.equals(CompHetQueryMode.PAIR) && koValue.equals(encodedChString)) { + ArrayList popFreqKeys = new ArrayList<>(popFreqQueryList.keySet()); + List> sortedPopFreqs = RgaUtils.generateSortedCombinations(popFreqQueryList.get(popFreqKeys.get(0)), + popFreqQueryList.get(popFreqKeys.get(1))); + List popFreqAndQueryList = new LinkedList<>(); + List tmpOrQueryList = new LinkedList<>(); + for (List sortedPopFreq : sortedPopFreqs) { + for (String filterVal : chFilterValues) { + for (String ctValue : chCtValues) { + // CH__P__P__1583__1583__P1-1__P2-2 + tmpOrQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + + sortedPopFreq.get(0) + SEPARATOR + sortedPopFreq.get(1)); + } + } + } + parseStringValue(tmpOrQueryList, "", popFreqAndQueryList, "||"); + + List filterValuesOrList = new LinkedList<>(); + for (String filterVal : chFilterValues) { + List tmpAndList = new LinkedList<>(); + for (List popFreqList : popFreqQueryList.values()) { + List popFreqOrQueryList = new LinkedList<>(); + for (String popFreq : popFreqList) { + popFreqOrQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + popFreq); + } + parseStringValue(popFreqOrQueryList, "", tmpAndList, "||"); + } + parseStringValue(tmpAndList, "", filterValuesOrList, "&&"); + } + parseStringValue(filterValuesOrList, "", popFreqAndQueryList, "||"); + parseStringValue(popFreqAndQueryList, "", koQueryList, "&&"); + } else { + List orQueryList = new LinkedList<>(); + for (String ctValue : ctValues) { + if (koValue.equals(delOverlap) && !INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES.contains(ctValue)) { + // Don't process this filter + continue; + } + for (String filterValue : filterValues) { + List tmpAndQueryList = new ArrayList<>(popFreqQueryList.size()); + for (List popFreqs : popFreqQueryList.values()) { + List tmpOrQueryList = new ArrayList<>(popFreqs.size()); + for (String popFreq : popFreqs) { + tmpOrQueryList.add(koValue + SEPARATOR + filterValue + SEPARATOR + ctValue + SEPARATOR + popFreq); + } + parseStringValue(tmpOrQueryList, "", tmpAndQueryList, "||"); + } + parseStringValue(tmpAndQueryList, "", orQueryList, "&&"); } } + parseStringValue(orQueryList, "", koQueryList, "||"); } - parseStringValue(orQueryList, "", andQueryList, "||"); } + parseStringValue(koQueryList, "", andQueryList, "||"); } else { for (List tmpPopFreqList : popFreqQueryList.values()) { List orQueryList = new LinkedList<>(); - for (String popFreq : tmpPopFreqList) { - for (String koValue : koValues) { - List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; - List finalCtValues = koValue.equals(encodedChString) ? chCtValues : ctValues; - for (String filterVal : finalFilterValues) { - for (String ctValue : finalCtValues) { - if (compHetQueryMode.equals(CompHetQueryMode.PAIR) && koValue.equals(encodedChString)) { - orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + popFreq - + SEPARATOR + popFreq); - } else { + for (String koValue : koValues) { + List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; + List finalCtValues = koValue.equals(encodedChString) ? chCtValues : ctValues; + for (String filterVal : finalFilterValues) { + for (String ctValue : finalCtValues) { + if (compHetQueryMode.equals(CompHetQueryMode.PAIR) && koValue.equals(encodedChString)) { + if (tmpPopFreqList.size() == 1) { + // Replicate the same value so it filters as a pair + tmpPopFreqList.add(tmpPopFreqList.get(0)); + } + List sortedCombinations = generateSortedCombinations(tmpPopFreqList); + for (String popFreqPair : sortedCombinations) { + orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + popFreqPair); + } + } else { + if (koValue.equals(delOverlap) && !INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES.contains(ctValue)) { + // Don't process this filter + continue; + } + for (String popFreq : tmpPopFreqList) { orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + popFreq); } } @@ -351,6 +444,10 @@ private void buildComplexQuery(List koValues, List filterValues, List finalCtValues = koValue.equals(encodedChString) ? chCtValues : ctValues; for (String filterVal : finalFilterValues) { for (String ctValue : finalCtValues) { + if (koValue.equals(delOverlap) && !INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES.contains(ctValue)) { + // Don't process this filter + continue; + } orFilterList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue); } } @@ -359,7 +456,7 @@ private void buildComplexQuery(List koValues, List filterValues, } else { // POP_FREQ not empty // KT + FILTER + POP_FREQ List andQueryList = new ArrayList<>(popFreqQueryList.size()); - if (popFreqQueryList.size() == 2) { + if (popFreqQueryList.size() == 2) { // + 2x POP FREQ ArrayList popFreqKeys = new ArrayList<>(popFreqQueryList.keySet()); List> sortedPopFreqs = RgaUtils.generateSortedCombinations(popFreqQueryList.get(popFreqKeys.get(0)), popFreqQueryList.get(popFreqKeys.get(1))); @@ -367,22 +464,58 @@ private void buildComplexQuery(List koValues, List filterValues, for (List sortedPopFreq : sortedPopFreqs) { for (String koValue : koValues) { List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; + List ctList = koValue.equals(delOverlap) ? INCLUDED_DEL_OVERLAP_PAIR_CTS : ALL_PAIRED_CONSEQUENCE_TYPES; for (String filterVal : finalFilterValues) { - orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0) + SEPARATOR - + sortedPopFreq.get(1)); - + // This is how it should be filtered +// orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0) + SEPARATOR +// + sortedPopFreq.get(1)); + if (koValue.equals(delOverlap)) { + for (String ctValue : ctList) { + List tmpAndQueryList = new ArrayList<>(2); + tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + + sortedPopFreq.get(0)); + tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + + sortedPopFreq.get(1)); + parseStringValue(tmpAndQueryList, "", orQueryList, "&&"); + } +// } else if (koValue.equals(encodedChString)) { +// orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0) + SEPARATOR +// + sortedPopFreq.get(1)); +// } else { +// List tmpAndQueryList = new ArrayList<>(2); +// tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0)); +// tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(1)); +// parseStringValue(tmpAndQueryList, "", orQueryList, "&&"); +// } + } else { + List tmpAndQueryList = new ArrayList<>(2); + tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0)); + tmpAndQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(1)); + parseStringValue(tmpAndQueryList, "", orQueryList, "&&"); + + if (koValue.equals(encodedChString)) { + orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + sortedPopFreq.get(0) + SEPARATOR + + sortedPopFreq.get(1)); + } + } } } } parseStringValue(orQueryList, "", andQueryList, "||"); - } else { + } else { // + 1x POP FREQ for (List tmpPopFreqList : popFreqQueryList.values()) { List orQueryList = new LinkedList<>(); for (String popFreq : tmpPopFreqList) { for (String koValue : koValues) { + List ctList = koValue.equals(delOverlap) ? INCLUDED_DEL_OVERLAP_CONSEQUENCE_TYPES + : ALL_CONSEQUENCE_TYPES; List finalFilterValues = koValue.equals(encodedChString) ? chFilterValues : filterValues; for (String filterVal : finalFilterValues) { - orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + popFreq); + // This is how it should be filtered +// orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + popFreq); + for (String ctValue : ctList) { + orQueryList.add(koValue + SEPARATOR + filterVal + SEPARATOR + ctValue + SEPARATOR + popFreq); + } } } } @@ -391,6 +524,7 @@ private void buildComplexQuery(List koValues, List filterValues, } parseStringValue(andQueryList, RgaDataModel.COMPOUND_FILTERS, filterList, "&&"); } + } public static List generateSortedCombinations(List list) { diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaUtils.java index 10bec244387..a506c98c34b 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaUtils.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/RgaUtils.java @@ -43,6 +43,21 @@ class RgaUtils { public static final Set ALL_PARAMS; public static final Map> PARAM_TYPES; + // CONSEQUENCE TYPE + public static final List CONSEQUENCE_TYPE_LIST = Arrays.asList("start_retained_variant", "upstream_variant", + "3_prime_UTR_variant", "splice_acceptor_variant", "transcript_amplification", "upstream_gene_variant", + "RNA_polymerase_promoter", "non_coding_transcript_exon_variant", "non_coding_transcript_variant", "inframe_variant", + "transcript_ablation", "splice_donor_variant", "synonymous_variant", "feature_elongation", "feature_truncation", + "miRNA_target_site", "exon_variant", "downstream_gene_variant", "stop_retained_variant", "TF_binding_site_variant", + "initiator_codon_variant", "coding_sequence_variant", "protein_altering_variant", "intergenic_variant", + "terminator_codon_variant", "frameshift_variant", "DNAseI_hypersensitive_site", "feature_variant", "2KB_downstream_variant", + "intron_variant", "splice_region_variant", "5_prime_UTR_variant", "SNP", "stop_gained", "regulatory_region_amplification", + "2KB_upstream_variant", "miRNA", "lincRNA", "start_lost", "SNV", "CpG_island", "downstream_variant", + "NMD_transcript_variant", "2KB_downstream_gene_variant", "TFBS_amplification", "missense_variant", + "regulatory_region_ablation", "mature_miRNA_variant", "stop_lost", "structural_variant", "regulatory_region_variant", + "TFBS_ablation", "copy_number_change", "2KB_upstream_gene_variant", "polypeptide_variation_site", "inframe_deletion", + "inframe_insertion", "incomplete_terminal_codon_variant"); + private static final Logger logger; static { @@ -60,21 +75,7 @@ class RgaUtils { ENCODE_MAP.put(PASS, "P"); ENCODE_MAP.put(NOT_PASS, "NP"); - // CONSEQUENCE TYPE - List consequenceTypeList = Arrays.asList("start_retained_variant", "upstream_variant", "3_prime_UTR_variant", - "splice_acceptor_variant", "transcript_amplification", "upstream_gene_variant", "RNA_polymerase_promoter", - "non_coding_transcript_exon_variant", "non_coding_transcript_variant", "inframe_variant", "transcript_ablation", - "splice_donor_variant", "synonymous_variant", "feature_elongation", "feature_truncation", "miRNA_target_site", - "exon_variant", "downstream_gene_variant", "stop_retained_variant", "TF_binding_site_variant", "initiator_codon_variant", - "coding_sequence_variant", "protein_altering_variant", "intergenic_variant", "terminator_codon_variant", - "frameshift_variant", "DNAseI_hypersensitive_site", "feature_variant", "2KB_downstream_variant", "intron_variant", - "splice_region_variant", "5_prime_UTR_variant", "SNP", "stop_gained", "regulatory_region_amplification", - "2KB_upstream_variant", "miRNA", "lincRNA", "start_lost", "SNV", "CpG_island", "downstream_variant", - "NMD_transcript_variant", "2KB_downstream_gene_variant", "TFBS_amplification", "missense_variant", - "regulatory_region_ablation", "mature_miRNA_variant", "stop_lost", "structural_variant", "regulatory_region_variant", - "TFBS_ablation", "copy_number_change", "2KB_upstream_gene_variant", "polypeptide_variation_site", "inframe_deletion", - "inframe_insertion", "incomplete_terminal_codon_variant"); - for (String consequenceType : consequenceTypeList) { + for (String consequenceType : CONSEQUENCE_TYPE_LIST) { ENCODE_MAP.put(consequenceType, String.valueOf(VariantQueryUtils.parseConsequenceType(consequenceType))); } @@ -323,16 +324,16 @@ public static Set generateCompoundHeterozygousPairCombination(List> previousIteration = result; - for (int i = 1; i < 4; i++) { + for (int i = 1; i < 3; i++) { // The list will contain all Filter, CT or PF combinations between variant1 and variant2 in a sorted manner to reduce the // number of terms List> sortedCombinations = generateSortedCombinations(variant1.get(i), variant2.get(i)); @@ -348,16 +349,6 @@ public static Set generateCompoundHeterozygousPairCombination(List> sortedPfCombinations = generateSortedCombinations(variant1.get(3), variant2.get(3)); - for (List previousValues : previousIteration) { - for (List values : sortedPfCombinations) { - List newValues = new ArrayList<>(previousValues); - newValues.addAll(values); - result.add(newValues); - } - } } result.addAll(newResults); previousIteration = newResults; @@ -365,6 +356,7 @@ public static Set generateCompoundHeterozygousPairCombination(List> sortedFilterList = generateSortedCombinations(variant1.get(1), variant2.get(1)); + List> sortedCtList = generateSortedCombinations(variant1.get(2), variant2.get(2)); List simplifiedPopFreqList = generateSimplifiedPopulationFrequencyList(variant1.get(3), variant2.get(3)); for (List filterList : sortedFilterList) { for (String popFreq : simplifiedPopFreqList) { @@ -374,6 +366,23 @@ public static Set generateCompoundHeterozygousPairCombination(List terms = new LinkedList<>(); + terms.add(knockout); + terms.addAll(filterList); + terms.addAll(simplifiedPopFreqList); + result.add(terms); + + // And: KO - F1 - F2 - CT1 - CT2 - PF1' - PF2' ; where PF' is equivalent to the highest PF of both variants + for (List ctList : sortedCtList) { + terms = new LinkedList<>(); + terms.add(knockout); + terms.addAll(filterList); + terms.addAll(ctList); + terms.addAll(simplifiedPopFreqList); + result.add(terms); + } } Set combinations = new HashSet<>(); @@ -797,23 +806,28 @@ private static CodedVariant decodeEncodedVariantId(String encodedVariant) throws } - public static class KnockoutTypeCount { - private Set variantIdQuery; - private Set dbSnpQuery; - private Set typeQuery; - private Set knockoutTypeQuery; - private Set clinicalSignificanceQuery; - private Set consequenceTypeQuery; - private List> popFreqQuery; - - // Valid CH pair variants - private Map> validChPairVariants; + public abstract static class KnockoutTypeCount { + private final Set variantIdQuery; + private final Set dbSnpQuery; + private final Set typeQuery; + private final Set knockoutTypeQuery; + private final Set clinicalSignificanceQuery; + private final Set consequenceTypeQuery; + private final List> popFreqQuery; private Set ids; - private Map> transcriptCompHetIdsMap; - private Map> transcriptDelOverlapIdsMap; - private Set homIds; - private Set hetIds; + protected Map> transcriptCompHetIdsMap; + protected Map> transcriptDelOverlapIdsMap; + protected Set compHetIds; + protected Set deletionOverlapIds; + protected Set homIds; + protected Set hetIds; + + private int numIds; + private int numHomAltIds; + private int numHetIds; + private int numCompHetIds; + private int numDelOverlapIds; public KnockoutTypeCount(Query query) throws RgaException { variantIdQuery = new HashSet<>(); @@ -823,10 +837,11 @@ public KnockoutTypeCount(Query query) throws RgaException { clinicalSignificanceQuery = new HashSet<>(); typeQuery = new HashSet<>(); consequenceTypeQuery = new HashSet<>(); - validChPairVariants = new HashMap<>(); ids = new HashSet<>(); transcriptCompHetIdsMap = new HashMap<>(); transcriptDelOverlapIdsMap = new HashMap<>(); + compHetIds = new HashSet<>(); + deletionOverlapIds = new HashSet<>(); homIds = new HashSet<>(); hetIds = new HashSet<>(); @@ -848,6 +863,12 @@ public KnockoutTypeCount(Query query) throws RgaException { popFreqQuery.add(new HashSet<>(values)); } } + + numIds = 0; + numHomAltIds = 0; + numHetIds = 0; + numCompHetIds = 0; + numDelOverlapIds = 0; } public boolean passesFilter(RgaUtils.CodedFeature codedFeature) { @@ -892,7 +913,7 @@ public void processFeature(RgaUtils.CodedFeature codedFeature) { return; } - ids.add(codedFeature.getId()); +// ids.add(codedFeature.getId()); KnockoutVariant.KnockoutType knockoutType = KnockoutVariant.KnockoutType.valueOf(codedFeature.getKnockoutType()); switch (knockoutType) { case HOM_ALT: @@ -918,6 +939,114 @@ public void processFeature(RgaUtils.CodedFeature codedFeature) { } } + protected void calculateStats() { + numCompHetIds = compHetIds.size(); + numDelOverlapIds = deletionOverlapIds.size(); + numHomAltIds = homIds.size(); + numHetIds = hetIds.size(); + + ids.addAll(homIds); + ids.addAll(hetIds); + ids.addAll(compHetIds); + ids.addAll(deletionOverlapIds); + numIds = ids.size(); + } + + public Set getIds() { + return ids; + } + + public int getNumIds() { + return numIds; + } + + public int getNumCompHetIds() { + return numCompHetIds; + } + + public int getNumHomAltIds() { + return numHomAltIds; + } + + public int getNumHetIds() { + return numHetIds; + } + + public int getNumDelOverlapIds() { + return numDelOverlapIds; + } + + public Map> getTranscriptCompHetIdsMap() { + Map> compHetMap = new HashMap<>(); + for (Map.Entry> entry : transcriptCompHetIdsMap.entrySet()) { + if (entry.getValue().size() > 1) { + compHetMap.put(entry.getKey(), new ArrayList<>(entry.getValue())); + } + } + return compHetMap; + } + } + + public static class IndividualKnockoutTypeCount extends KnockoutTypeCount { + + private int numHomAltCompHetIds; + private int numCompHetDelOverlapIds; + + public IndividualKnockoutTypeCount(Query query) throws RgaException { + super(query); + } + + @Override + public void calculateStats() { + compHetIds = transcriptCompHetIdsMap.values() + .stream() + .flatMap(Set::stream) + .collect(Collectors.toSet()); + + deletionOverlapIds = transcriptDelOverlapIdsMap.values() + .stream() + .flatMap(Set::stream) + .collect(Collectors.toSet()); + + Set homAltCompHetIds = new HashSet<>(homIds); + homAltCompHetIds.addAll(compHetIds); + numHomAltCompHetIds = homAltCompHetIds.size(); + + Set compHetDelOverlapIds = new HashSet<>(compHetIds); + compHetDelOverlapIds.addAll(deletionOverlapIds); + numCompHetDelOverlapIds = compHetDelOverlapIds.size(); + + super.calculateStats(); + } + + public int getNumHomAltCompHetIds() { + return numHomAltCompHetIds; + } + + public int getNumCompHetDelOverlapIds() { + return numCompHetDelOverlapIds; + } + } + + public static class VariantKnockoutTypeCount extends KnockoutTypeCount { + + // Valid CH pair variants + private Map> validPairedChPairVariants; + private Set validChPairVariants; + + private final RgaQueryParams.CompHetQueryMode compHetQueryMode; + + private int numPairedCompHetIds; + private int numPairedDelOverlapIds; + + public VariantKnockoutTypeCount(Query query, RgaQueryParams.CompHetQueryMode compHetQueryMode) throws RgaException { + super(query); + this.compHetQueryMode = compHetQueryMode; + + this.validPairedChPairVariants = new HashMap<>(); + this.validChPairVariants = new HashSet<>(); + } + public void processChPairFeature(RgaUtils.CodedChPairVariants codedFeature) { String leftVariant = codedFeature.getMaternalCodedVariant().getId(); String rightVariant = codedFeature.getPaternalCodedVariant().getId(); @@ -928,37 +1057,23 @@ public void processChPairFeature(RgaUtils.CodedChPairVariants codedFeature) { } // Keys are always lexicographically less than variants as values - if (!validChPairVariants.containsKey(leftVariant)) { - validChPairVariants.put(leftVariant, new HashSet<>()); + if (!validPairedChPairVariants.containsKey(leftVariant)) { + validPairedChPairVariants.put(leftVariant, new HashSet<>()); } - validChPairVariants.get(leftVariant).add(rightVariant); + validPairedChPairVariants.get(leftVariant).add(rightVariant); + validChPairVariants.add(leftVariant); + validChPairVariants.add(rightVariant); } - public Set getIds() { - return ids; - } - public int getNumIds() { - return ids.size(); - } - - public int getNumCompHetIds() { - return (int) transcriptCompHetIdsMap.values().stream().flatMap(Set::stream).distinct().count(); - } - - public int getNumPairedCompHetIds() { - int threshold = 250; + @Override + public void calculateStats() { + // Calculate number of comp_het pairs Set chPairs = new HashSet<>(); + Set pairedChPairs = new HashSet<>(); for (Map.Entry> entry : transcriptCompHetIdsMap.entrySet()) { Set chSet = entry.getValue(); if (chSet.size() > 1) { - if (chSet.size() > threshold) { - logger.warn("Showing a -1 value for the numPairedCompHet stats. More than {} COMP_HET variants found in" - + " transcript {}", threshold, entry.getKey()); - // Don't calculate this if the number of possible pairs is too big - return -1; - } - // Sort variants lexicographically so we just need to check once List sortedVariants = chSet.stream().sorted(String::compareTo).collect(Collectors.toList()); for (int i = 0; i < sortedVariants.size() - 1; i++) { @@ -966,70 +1081,67 @@ public int getNumPairedCompHetIds() { for (int j = i + 1; j < sortedVariants.size(); j++) { String rightVariant = sortedVariants.get(j); - if (validChPairVariants.containsKey(leftVariant) - && validChPairVariants.get(leftVariant).contains(rightVariant)) { - chPairs.add(leftVariant + "-" + rightVariant); + if (validPairedChPairVariants.containsKey(leftVariant) + && validPairedChPairVariants.get(leftVariant).contains(rightVariant)) { + pairedChPairs.add(leftVariant + "-" + rightVariant); + chPairs.add(leftVariant); + chPairs.add(rightVariant); } } } } } - return chPairs.size(); - } + numPairedCompHetIds = pairedChPairs.size(); + + // If we are searching by pairs, we should only count those that actually formed a pair + if (compHetQueryMode.equals(RgaQueryParams.CompHetQueryMode.PAIR)) { + compHetIds = transcriptCompHetIdsMap.values() + .stream() + .flatMap(Set::stream) + .filter(chPairs::contains) + .collect(Collectors.toSet()); + } else { + compHetIds = transcriptCompHetIdsMap.values() + .stream() + .flatMap(Set::stream) + .collect(Collectors.toSet()); + } - public int getNumPairedDelOverlapIds() { - int numPairedDelOverlap = 0; + // Process deletion overlap pairs + Set delOverlapPairs = new HashSet<>(); for (Map.Entry> entry : transcriptDelOverlapIdsMap.entrySet()) { Set chSet = entry.getValue(); if (chSet.size() > 1) { List variantList = chSet.stream().map(Variant::new).collect(Collectors.toList()); for (int i = 0; i < variantList.size() - 1; i++) { for (int j = i + 1; j < variantList.size(); j++) { + deletionOverlapIds.add(variantList.get(i).toString()); + deletionOverlapIds.add(variantList.get(j).toString()); + // We simply check if two variants overlap. If they do, they are a valid pair if (variantList.get(i).overlapWith(variantList.get(j), true)) { - numPairedDelOverlap++; + String pair = concatSortedVariants(variantList.get(i).toString(), variantList.get(j).toString()); + delOverlapPairs.add(pair); } } } } } - return numPairedDelOverlap; - } + numPairedDelOverlapIds = delOverlapPairs.size(); - public int getNumHomIds() { - return homIds.size(); + super.calculateStats(); } - public int getNumHetIds() { - return hetIds.size(); + private String concatSortedVariants(String v1, String v2) { + return StringUtils.compare(v1, v2) <= 0 ? v1 + "__" + v2 : v2 + "__" + v1; } - public int getNumDelOverlapIds() { - return (int) transcriptDelOverlapIdsMap.values().stream().flatMap(Set::stream).distinct().count(); - } - - public int getNumHomAltCompHetIds() { - Set ids = new HashSet<>(homIds); - ids.addAll(transcriptCompHetIdsMap.values().stream().flatMap(Set::stream).collect(Collectors.toSet())); - return ids.size(); - } - - public int getNumCompHetDelOverlapIds() { - Set ids = new HashSet<>(); - ids.addAll(transcriptDelOverlapIdsMap.values().stream().flatMap(Set::stream).collect(Collectors.toSet())); - ids.addAll(transcriptCompHetIdsMap.values().stream().flatMap(Set::stream).collect(Collectors.toSet())); - return ids.size(); + public int getNumPairedCompHetIds() { + return numPairedCompHetIds; } - public Map> getTranscriptCompHetIdsMap() { - Map> compHetMap = new HashMap<>(); - for (Map.Entry> entry : transcriptCompHetIdsMap.entrySet()) { - if (entry.getValue().size() > 1) { - compHetMap.put(entry.getKey(), new ArrayList<>(entry.getValue())); - } - } - return compHetMap; + public int getNumPairedDelOverlapIds() { + return numPairedDelOverlapIds; } } - } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/iterators/SolrNativeIterator.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/iterators/SolrNativeIterator.java index 10fe15491d0..0ccc66a5603 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/iterators/SolrNativeIterator.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/rga/iterators/SolrNativeIterator.java @@ -68,6 +68,9 @@ public SolrNativeIterator(SolrClient solrClient, String collection, SolrQuery so @Override public boolean hasNext() { + if (listBuffer.isEmpty()) { + fetchNextBatch(); + } return !listBuffer.isEmpty(); } diff --git a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java index 1cdba30f1f3..74114c5e1f2 100644 --- a/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java +++ b/opencga-analysis/src/main/java/org/opencb/opencga/analysis/variant/manager/VariantCatalogQueryUtils.java @@ -58,6 +58,7 @@ import org.opencb.opencga.core.response.OpenCGAResult; import org.opencb.opencga.storage.core.exceptions.StorageEngineException; import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager; +import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.utils.CellBaseUtils; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; @@ -549,7 +550,7 @@ public Query parseQuery(Query query, QueryOptions queryOptions, CellBaseUtils ce "Require at least one parent to get compound heterozygous"); } - query.append(SAMPLE_COMPOUND_HETEROZYGOUS.key(), Arrays.asList(childId, fatherId, motherId)); + query.append(SAMPLE_COMPOUND_HETEROZYGOUS.key(), new Trio(fatherId, motherId, childId)); } else { if (family.getDisorders().isEmpty()) { throw VariantQueryException.malformedParam(FAMILY, familyId, "Family doesn't have disorders"); @@ -1024,7 +1025,7 @@ private void processSampleFilter(Query query, String defaultStudyStr, String tok String fatherId = member.getFather() != null ? member.getFather().getId() : MISSING_SAMPLE; String motherId = member.getMother() != null ? member.getMother().getId() : MISSING_SAMPLE; - query.put(SAMPLE_COMPOUND_HETEROZYGOUS.key(), Arrays.asList(member.getId(), fatherId, motherId)); + query.put(SAMPLE_COMPOUND_HETEROZYGOUS.key(), new Trio(fatherId, motherId, member.getId())); query.remove(SAMPLE.key()); } else if (moi == ClinicalProperty.ModeOfInheritance.DE_NOVO) { query.put(SAMPLE_DE_NOVO.key(), member.getId()); diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/RgaSearchConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/RgaSearchConfiguration.java new file mode 100644 index 00000000000..fc89f23420d --- /dev/null +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/RgaSearchConfiguration.java @@ -0,0 +1,58 @@ +package org.opencb.opencga.core.config; + +import java.util.List; + +public class RgaSearchConfiguration extends SearchConfiguration { + + private boolean cache; + private int cacheSize; + private String suffix; + + public RgaSearchConfiguration() { + } + + public RgaSearchConfiguration(List hosts, String configSet, String mode, String user, String password, String manager, + boolean active, int timeout, int insertBatchSize, boolean cache, int cacheSize, String suffix) { + super(hosts, configSet, mode, user, password, manager, active, timeout, insertBatchSize); + this.cache = cache; + this.cacheSize = cacheSize; + this.suffix = suffix; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("RgaSearchConfiguration{"); + sb.append("cache=").append(cache); + sb.append(", cacheSize=").append(cacheSize); + sb.append(", suffix='").append(suffix).append('\''); + sb.append('}'); + return sb.toString(); + } + + public boolean isCache() { + return cache; + } + + public RgaSearchConfiguration setCache(boolean cache) { + this.cache = cache; + return this; + } + + public int getCacheSize() { + return cacheSize; + } + + public RgaSearchConfiguration setCacheSize(int cacheSize) { + this.cacheSize = cacheSize; + return this; + } + + public String getSuffix() { + return suffix; + } + + public RgaSearchConfiguration setSuffix(String suffix) { + this.suffix = suffix; + return this; + } +} diff --git a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/StorageConfiguration.java b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/StorageConfiguration.java index 89157650f4f..45c5e6091d7 100644 --- a/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/StorageConfiguration.java +++ b/opencga-core/src/main/java/org/opencb/opencga/core/config/storage/StorageConfiguration.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import org.apache.commons.lang3.StringUtils; import org.opencb.commons.datastore.core.ObjectMap; +import org.opencb.opencga.core.config.RgaSearchConfiguration; import org.opencb.opencga.core.config.SearchConfiguration; import org.opencb.opencga.core.config.ServerConfiguration; import org.slf4j.Logger; @@ -43,7 +44,7 @@ public class StorageConfiguration { private CacheConfiguration cache; private SearchConfiguration search; private SearchConfiguration clinical; - private SearchConfiguration rga; + private RgaSearchConfiguration rga; private ObjectMap alignment; private StorageEnginesConfiguration variant; private IOConfiguration io; @@ -61,7 +62,7 @@ public StorageConfiguration() { this.cache = new CacheConfiguration(); this.search = new SearchConfiguration(); this.clinical = new SearchConfiguration(); - this.rga = new SearchConfiguration(); + this.rga = new RgaSearchConfiguration(); } @@ -192,11 +193,11 @@ public StorageConfiguration setClinical(SearchConfiguration clinical) { return this; } - public SearchConfiguration getRga() { + public RgaSearchConfiguration getRga() { return rga; } - public StorageConfiguration setRga(SearchConfiguration rga) { + public StorageConfiguration setRga(RgaSearchConfiguration rga) { this.rga = rga; return this; } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/Trio.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/Trio.java index 58e0e6cff7f..2c12a0021e0 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/Trio.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/metadata/models/Trio.java @@ -1,7 +1,10 @@ package org.opencb.opencga.storage.core.metadata.models; +import org.apache.logging.log4j.util.Strings; + import java.util.ArrayList; import java.util.List; +import java.util.Objects; public class Trio { private final String id; @@ -9,6 +12,21 @@ public class Trio { private final String mother; private final String child; + public Trio(List trio) { + this(null, trio); + } + + public Trio(String id, List trio) { + this.id = id; + this.father = trio.get(1); + this.mother = trio.get(2); + this.child = trio.get(0); + } + + public Trio(String father, String mother, String child) { + this(null, father, mother, child); + } + public Trio(String id, String father, String mother, String child) { this.id = id; this.father = father; @@ -43,4 +61,29 @@ public List toList() { } return list; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Trio trio = (Trio) o; + return Objects.equals(id, trio.id) + && Objects.equals(father, trio.father) + && Objects.equals(mother, trio.mother) + && Objects.equals(child, trio.child); + } + + @Override + public int hashCode() { + return Objects.hash(id, father, mother, child); + } + + @Override + public String toString() { + return Strings.join(toList(), ','); + } } diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java index 5e2558339de..f1ae7490d50 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/VariantStorageEngine.java @@ -1073,7 +1073,7 @@ public VariantQueryResult getCompoundHeterozygous(String study, String father = StringUtils.isEmpty(father) ? CompoundHeterozygousQueryExecutor.MISSING_SAMPLE : father; mother = StringUtils.isEmpty(mother) ? CompoundHeterozygousQueryExecutor.MISSING_SAMPLE : mother; query = new Query(query) - .append(VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS.key(), Arrays.asList(child, father, mother)) + .append(VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS.key(), new Trio(father, mother, child)) .append(VariantQueryParam.STUDY.key(), study); return get(query, options); diff --git a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java index ece9915aa76..db0815b5135 100644 --- a/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java +++ b/opencga-storage/opencga-storage-core/src/main/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutor.java @@ -210,15 +210,15 @@ protected List getAndCheckIncludeSample(Query query, String proband, Str // Check it has all required members if (!includeSamples.contains(proband)) { throw VariantQueryException.malformedParam(VariantQueryParam.INCLUDE_SAMPLE, includeSamples.toString(), - "Can not compute CompoundHeterozygous not including the proband in the query"); + "Can not compute CompoundHeterozygous not including the proband '" + proband + "' in the query"); } if (!mother.equals(MISSING_SAMPLE) && !includeSamples.contains(mother)) { throw VariantQueryException.malformedParam(VariantQueryParam.INCLUDE_SAMPLE, includeSamples.toString(), - "Can not compute CompoundHeterozygous not including the mother in the query"); + "Can not compute CompoundHeterozygous not including the mother '" + mother + "' in the query"); } if (!father.equals(MISSING_SAMPLE) && !includeSamples.contains(father)) { throw VariantQueryException.malformedParam(VariantQueryParam.INCLUDE_SAMPLE, includeSamples.toString(), - "Can not compute CompoundHeterozygous not including the father in the query"); + "Can not compute CompoundHeterozygous not including the father '" + father + "' in the query"); } } else { if (father.equals(MISSING_SAMPLE)) { @@ -265,9 +265,13 @@ protected VariantDBIterator getRawIterator(String proband, String father, String } protected Trio getCompHetTrio(Query query) { + Object o = query.get(SAMPLE_COMPOUND_HETEROZYGOUS.key()); + if (o instanceof Trio) { + return ((Trio) o); + } List samples = query.getAsStringList(VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS.key()); if (samples.size() == 3) { - return new Trio(null, samples.get(2), samples.get(0), samples.get(1)); + return new Trio(samples); } else if (samples.size() == 1) { int studyId = metadataManager.getStudyId(query.getString(VariantQueryParam.STUDY.key())); String sample = samples.get(0); diff --git a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutorTest.java b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutorTest.java index 23144754972..d0f10800f9b 100644 --- a/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutorTest.java +++ b/opencga-storage/opencga-storage-core/src/test/java/org/opencb/opencga/storage/core/variant/query/executors/CompoundHeterozygousQueryExecutorTest.java @@ -5,6 +5,7 @@ import org.mockito.Mockito; import org.opencb.commons.datastore.core.Query; import org.opencb.commons.datastore.core.QueryOptions; +import org.opencb.opencga.storage.core.metadata.models.Trio; import org.opencb.opencga.storage.core.variant.adaptors.VariantField; import org.opencb.opencga.storage.core.variant.adaptors.VariantIterable; import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException; @@ -19,6 +20,7 @@ import static org.junit.Assert.assertFalse; import static org.opencb.opencga.storage.core.variant.adaptors.VariantField.*; import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.ALL; +import static org.opencb.opencga.storage.core.variant.query.VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS; /** * Created on 09/04/19. @@ -65,6 +67,19 @@ public void testBuildQueryOptions() { STUDIES, STUDIES_SAMPLES)), includeFields); } + @Test + public void getCompHetTrio() { + Trio expected = new Trio("F", "M", "C"); + Trio actual = ch.getCompHetTrio(new Query(SAMPLE_COMPOUND_HETEROZYGOUS.key(), expected.toList())); + assertEquals(expected, actual); + + actual = ch.getCompHetTrio(new Query(SAMPLE_COMPOUND_HETEROZYGOUS.key(), expected.toString())); + assertEquals(expected, actual); + + actual = ch.getCompHetTrio(new Query(SAMPLE_COMPOUND_HETEROZYGOUS.key(), expected)); + assertEquals(expected, actual); + } + @Test public void testGetAndCheckIncludeSample() {