From 6844735a4513fb747780b346441f8da5107d0fe5 Mon Sep 17 00:00:00 2001
From: Antonio Rivero <38889534+Antonio-RiveroMartnez@users.noreply.github.com>
Date: Fri, 12 Apr 2024 00:54:21 +0200
Subject: [PATCH] fix(time_offset): improved LIMIT-handling in advanced
 analytics (#27934)

---
 superset/common/query_context_processor.py    |  7 ++
 .../integration_tests/query_context_tests.py  | 67 ++++++++++++++++++-
 2 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/superset/common/query_context_processor.py b/superset/common/query_context_processor.py
index f003f8ed30c2e..55c80386a316e 100644
--- a/superset/common/query_context_processor.py
+++ b/superset/common/query_context_processor.py
@@ -455,6 +455,13 @@ def processing_time_offsets(  # pylint: disable=too-many-locals,too-many-stateme
                 for metric in metric_names
             }
 
+            # When the original query has limit or offset we wont apply those
+            # to the subquery so we prevent data inconsistency due to missing records
+            # in the dataframes when performing the join
+            if query_object.row_limit or query_object.row_offset:
+                query_object_clone_dct["row_limit"] = config["ROW_LIMIT"]
+                query_object_clone_dct["row_offset"] = 0
+
             if isinstance(self._qc_datasource, Query):
                 result = self._qc_datasource.exc_query(query_object_clone_dct)
             else:
diff --git a/tests/integration_tests/query_context_tests.py b/tests/integration_tests/query_context_tests.py
index dc400de5ef68e..d36b6c85c14c2 100644
--- a/tests/integration_tests/query_context_tests.py
+++ b/tests/integration_tests/query_context_tests.py
@@ -17,13 +17,14 @@
 import re
 import time
 from typing import Any
+from unittest.mock import Mock, patch
 
 import numpy as np
 import pandas as pd
 import pytest
 from pandas import DateOffset
 
-from superset import db
+from superset import app, db
 from superset.charts.schemas import ChartDataQueryContextSchema
 from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType
 from superset.common.query_context import QueryContext
@@ -674,6 +675,70 @@ def test_time_offsets_accuracy(self):
                     == df_3_years_later.loc[index]["sum__num"]
                 )
 
+    @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
+    @patch("superset.common.query_context.QueryContext.get_query_result")
+    def test_time_offsets_in_query_object_no_limit(self, query_result_mock):
+        """
+        Ensure that time_offsets can generate the correct queries and
+        it doesnt use the row_limit nor row_offset from the original
+        query object
+        """
+        payload = get_query_context("birth_names")
+        payload["queries"][0]["columns"] = [
+            {
+                "timeGrain": "P1D",
+                "columnType": "BASE_AXIS",
+                "sqlExpression": "ds",
+                "label": "ds",
+                "expressionType": "SQL",
+            }
+        ]
+        payload["queries"][0]["metrics"] = ["sum__num"]
+        payload["queries"][0]["groupby"] = ["name"]
+        payload["queries"][0]["is_timeseries"] = True
+        payload["queries"][0]["row_limit"] = 100
+        payload["queries"][0]["row_offset"] = 10
+        payload["queries"][0]["time_range"] = "1990 : 1991"
+
+        initial_data = {
+            "__timestamp": ["1990-01-01", "1990-01-01"],
+            "name": ["zban", "ahwb"],
+            "sum__num": [43571, 27225],
+        }
+        initial_df = pd.DataFrame(initial_data)
+
+        mock_query_result = Mock()
+        mock_query_result.df = initial_df
+        side_effects = [mock_query_result]
+        query_result_mock.side_effect = side_effects
+        # Proceed with the test as before
+        query_context = ChartDataQueryContextSchema().load(payload)
+        query_object = query_context.queries[0]
+        # First call to get_query_result, should return initial_df
+        query_result = query_context.get_query_result(query_object)
+        df = query_result.df
+        # Setup the payload for time offsets
+        payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
+        query_context = ChartDataQueryContextSchema().load(payload)
+        query_object = query_context.queries[0]
+        time_offsets_obj = query_context.processing_time_offsets(df, query_object)
+        sqls = time_offsets_obj["queries"]
+        row_limit_value = app.config["ROW_LIMIT"]
+        row_limit_pattern_with_config_value = r"LIMIT " + re.escape(
+            str(row_limit_value)
+        )
+        self.assertEqual(len(sqls), 2)
+        # 1 year ago
+        assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
+        assert not re.search(r"LIMIT 100", sqls[0], re.S)
+        assert not re.search(r"OFFSET 10", sqls[0], re.S)
+        assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S)
+        # 1 year later
+        assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
+        assert not re.search(r"LIMIT 100", sqls[1], re.S)
+        assert not re.search(r"OFFSET 10", sqls[1], re.S)
+        assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S)
+
 
 def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
     qc = QueryContextFactory().create(