From 6844735a4513fb747780b346441f8da5107d0fe5 Mon Sep 17 00:00:00 2001 From: Antonio Rivero <38889534+Antonio-RiveroMartnez@users.noreply.github.com> Date: Fri, 12 Apr 2024 00:54:21 +0200 Subject: [PATCH] fix(time_offset): improved LIMIT-handling in advanced analytics (#27934) --- superset/common/query_context_processor.py | 7 ++ .../integration_tests/query_context_tests.py | 67 ++++++++++++++++++- 2 files changed, 73 insertions(+), 1 deletion(-) diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py index f003f8ed30c2e..55c80386a316e 100644 --- a/superset/common/query_context_processor.py +++ b/superset/common/query_context_processor.py @@ -455,6 +455,13 @@ def processing_time_offsets( # pylint: disable=too-many-locals,too-many-stateme for metric in metric_names } + # When the original query has limit or offset we wont apply those + # to the subquery so we prevent data inconsistency due to missing records + # in the dataframes when performing the join + if query_object.row_limit or query_object.row_offset: + query_object_clone_dct["row_limit"] = config["ROW_LIMIT"] + query_object_clone_dct["row_offset"] = 0 + if isinstance(self._qc_datasource, Query): result = self._qc_datasource.exc_query(query_object_clone_dct) else: diff --git a/tests/integration_tests/query_context_tests.py b/tests/integration_tests/query_context_tests.py index dc400de5ef68e..d36b6c85c14c2 100644 --- a/tests/integration_tests/query_context_tests.py +++ b/tests/integration_tests/query_context_tests.py @@ -17,13 +17,14 @@ import re import time from typing import Any +from unittest.mock import Mock, patch import numpy as np import pandas as pd import pytest from pandas import DateOffset -from superset import db +from superset import app, db from superset.charts.schemas import ChartDataQueryContextSchema from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType from superset.common.query_context import QueryContext @@ -674,6 +675,70 @@ def test_time_offsets_accuracy(self): == df_3_years_later.loc[index]["sum__num"] ) + @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices") + @patch("superset.common.query_context.QueryContext.get_query_result") + def test_time_offsets_in_query_object_no_limit(self, query_result_mock): + """ + Ensure that time_offsets can generate the correct queries and + it doesnt use the row_limit nor row_offset from the original + query object + """ + payload = get_query_context("birth_names") + payload["queries"][0]["columns"] = [ + { + "timeGrain": "P1D", + "columnType": "BASE_AXIS", + "sqlExpression": "ds", + "label": "ds", + "expressionType": "SQL", + } + ] + payload["queries"][0]["metrics"] = ["sum__num"] + payload["queries"][0]["groupby"] = ["name"] + payload["queries"][0]["is_timeseries"] = True + payload["queries"][0]["row_limit"] = 100 + payload["queries"][0]["row_offset"] = 10 + payload["queries"][0]["time_range"] = "1990 : 1991" + + initial_data = { + "__timestamp": ["1990-01-01", "1990-01-01"], + "name": ["zban", "ahwb"], + "sum__num": [43571, 27225], + } + initial_df = pd.DataFrame(initial_data) + + mock_query_result = Mock() + mock_query_result.df = initial_df + side_effects = [mock_query_result] + query_result_mock.side_effect = side_effects + # Proceed with the test as before + query_context = ChartDataQueryContextSchema().load(payload) + query_object = query_context.queries[0] + # First call to get_query_result, should return initial_df + query_result = query_context.get_query_result(query_object) + df = query_result.df + # Setup the payload for time offsets + payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"] + query_context = ChartDataQueryContextSchema().load(payload) + query_object = query_context.queries[0] + time_offsets_obj = query_context.processing_time_offsets(df, query_object) + sqls = time_offsets_obj["queries"] + row_limit_value = app.config["ROW_LIMIT"] + row_limit_pattern_with_config_value = r"LIMIT " + re.escape( + str(row_limit_value) + ) + self.assertEqual(len(sqls), 2) + # 1 year ago + assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S) + assert not re.search(r"LIMIT 100", sqls[0], re.S) + assert not re.search(r"OFFSET 10", sqls[0], re.S) + assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S) + # 1 year later + assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S) + assert not re.search(r"LIMIT 100", sqls[1], re.S) + assert not re.search(r"OFFSET 10", sqls[1], re.S) + assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S) + def test_get_label_map(app_context, virtual_dataset_comma_in_column_value): qc = QueryContextFactory().create(