From cedf33c8917401a8f2295edfc1c3c09526521d88 Mon Sep 17 00:00:00 2001 From: Eric Duong Date: Tue, 20 Jul 2021 13:36:22 -0400 Subject: [PATCH] Add median time for funnel conversion data (#5203) * add median time for conversion steps * patch tests * fix time to convert funnel query * fix strict and unordered * add median conversion time to base * fix * more tests --- ee/clickhouse/queries/funnels/base.py | 30 +++++++++++++++---- ee/clickhouse/queries/funnels/funnel.py | 13 +++++--- .../queries/funnels/funnel_strict.py | 4 +-- .../queries/funnels/funnel_time_to_convert.py | 4 +-- .../queries/funnels/funnel_unordered.py | 8 ++--- .../queries/funnels/test/breakdown_cases.py | 26 ++++++++++++++++ .../queries/funnels/test/test_funnel.py | 5 ++++ .../funnels/test/test_funnel_strict.py | 4 +++ .../funnels/test/test_funnel_unordered.py | 4 +++ 9 files changed, 80 insertions(+), 18 deletions(-) diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 1c46292698e1a..cfe7ddb9becea 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -61,10 +61,13 @@ def _format_single_funnel(self, results, with_breakdown=False): serialized_result = self._serialize_step(step, total_people, []) if step.order > 0: serialized_result.update( - {"average_conversion_time": results[step.order + len(self._filter.entities) - 1]} + { + "average_conversion_time": results[step.order + len(self._filter.entities) - 1], + "median_conversion_time": results[step.order + len(self._filter.entities) * 2 - 2], + } ) else: - serialized_result.update({"average_conversion_time": None}) + serialized_result.update({"average_conversion_time": None, "median_conversion_time": None}) if with_breakdown: serialized_result.update({"breakdown": results[-1]}) @@ -113,14 +116,13 @@ def _exec_query(self) -> List[Tuple]: self._filter = self._filter.with_data(data) query = self.get_query() - return sync_execute(query, self.params) def _get_step_times(self, max_steps: int): conditions: List[str] = [] for i in range(1, max_steps): conditions.append( - f"if(isNotNull(latest_{i}), dateDiff('second', toDateTime(latest_{i - 1}), toDateTime(latest_{i})), NULL) step_{i}_average_conversion_time" + f"if(isNotNull(latest_{i}), dateDiff('second', toDateTime(latest_{i - 1}), toDateTime(latest_{i})), NULL) step_{i}_conversion_time" ) formatted = ", ".join(conditions) @@ -316,10 +318,26 @@ def _get_count_columns(self, max_steps: int): return ", ".join(cols) - def _get_step_time_avgs(self, max_steps: int): + def _get_step_time_avgs(self, max_steps: int, inner_query: bool = False): conditions: List[str] = [] for i in range(1, max_steps): - conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time") + conditions.append( + f"avg(step_{i}_conversion_time) step_{i}_average_conversion_time_inner" + if inner_query + else f"avg(step_{i}_average_conversion_time_inner) step_{i}_average_conversion_time" + ) + + formatted = ", ".join(conditions) + return f", {formatted}" if formatted else "" + + def _get_step_time_median(self, max_steps: int, inner_query: bool = False): + conditions: List[str] = [] + for i in range(1, max_steps): + conditions.append( + f"median(step_{i}_conversion_time) step_{i}_median_conversion_time_inner" + if inner_query + else f"median(step_{i}_median_conversion_time_inner) step_{i}_median_conversion_time" + ) formatted = ", ".join(conditions) return f", {formatted}" if formatted else "" diff --git a/ee/clickhouse/queries/funnels/funnel.py b/ee/clickhouse/queries/funnels/funnel.py index 1188adab7987c..3874ac98a29d7 100644 --- a/ee/clickhouse/queries/funnels/funnel.py +++ b/ee/clickhouse/queries/funnels/funnel.py @@ -33,7 +33,7 @@ def get_query(self): breakdown_clause = self._get_breakdown_prop() return f""" - SELECT {self._get_count_columns(max_steps)} {self._get_people_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + SELECT {self._get_count_columns(max_steps)} {self._get_people_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM ( {self.get_step_counts_query()} ) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1 """ @@ -52,7 +52,7 @@ def get_step_counts_query(self): max_steps = len(self._filter.entities) breakdown_clause = self._get_breakdown_prop() - return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM ( {steps_per_person_query} ) GROUP BY person_id {breakdown_clause} """ @@ -82,9 +82,14 @@ def _format_single_funnel(self, result, with_breakdown=False): serialized_result = self._serialize_step(step, total_people, relevant_people[0:100]) if step.order > 0: - serialized_result.update({"average_conversion_time": result[step.order + num_entities * 2 - 1]}) + serialized_result.update( + { + "average_conversion_time": result[step.order + num_entities * 2 - 1], + "median_conversion_time": result[step.order + num_entities * 3 - 2], + } + ) else: - serialized_result.update({"average_conversion_time": None}) + serialized_result.update({"average_conversion_time": None, "median_conversion_time": None}) if with_breakdown: serialized_result.update( diff --git a/ee/clickhouse/queries/funnels/funnel_strict.py b/ee/clickhouse/queries/funnels/funnel_strict.py index 2868104bcb306..ca354c103c366 100644 --- a/ee/clickhouse/queries/funnels/funnel_strict.py +++ b/ee/clickhouse/queries/funnels/funnel_strict.py @@ -10,7 +10,7 @@ def get_query(self): breakdown_clause = self._get_breakdown_prop() return f""" - SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM ( {self.get_step_counts_query()} ) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1 """ @@ -23,7 +23,7 @@ def get_step_counts_query(self): breakdown_clause = self._get_breakdown_prop() return f""" - SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM ( {formatted_query} ) GROUP BY person_id {breakdown_clause} """ diff --git a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py index a08d5f93dc079..42e61e2690605 100644 --- a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py +++ b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py @@ -56,7 +56,7 @@ def get_query(self) -> str: ) steps_average_conversion_time_identifiers = [ - f"step_{step+1}_average_conversion_time" for step in range(from_step, to_step) + f"step_{step+1}_average_conversion_time_inner" for step in range(from_step, to_step) ] steps_average_conversion_time_expression_sum = " + ".join(steps_average_conversion_time_identifiers) @@ -99,7 +99,7 @@ def get_query(self) -> str: count() AS person_count FROM step_runs -- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too - WHERE step_{to_step}_average_conversion_time IS NOT NULL + WHERE step_{to_step}_average_conversion_time_inner IS NOT NULL GROUP BY bin_from_seconds ) results FULL OUTER JOIN ( diff --git a/ee/clickhouse/queries/funnels/funnel_unordered.py b/ee/clickhouse/queries/funnels/funnel_unordered.py index dddc7910ba0bb..e18558dfbb5a2 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered.py @@ -30,7 +30,7 @@ def get_query(self): breakdown_clause = self._get_breakdown_prop() return f""" - SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM ( {self.get_step_counts_query()} ) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1 """ @@ -43,7 +43,7 @@ def get_step_counts_query(self): breakdown_clause = self._get_breakdown_prop() return f""" - SELECT person_id, steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM ( + SELECT person_id, steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM ( SELECT person_id, steps, max(steps) over (PARTITION BY person_id {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} FROM ( {union_query} ) @@ -84,7 +84,7 @@ def get_step_counts_without_aggregation_query(self): def _get_step_time_names(self, max_steps: int): names = [] for i in range(1, max_steps): - names.append(f"step_{i}_average_conversion_time") + names.append(f"step_{i}_conversion_time") formatted = ",".join(names) return f", {formatted}" if formatted else "" @@ -100,7 +100,7 @@ def _get_step_times(self, max_steps: int): for i in range(1, max_steps): conditions.append( - f"if(isNotNull(conversion_times[{i+1}]), dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_average_conversion_time" + f"if(isNotNull(conversion_times[{i+1}]), dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time" ) # array indices in ClickHouse are 1-based :shrug: diff --git a/ee/clickhouse/queries/funnels/test/breakdown_cases.py b/ee/clickhouse/queries/funnels/test/breakdown_cases.py index 85b03ffcc7e1e..0c78b033f46d2 100644 --- a/ee/clickhouse/queries/funnels/test/breakdown_cases.py +++ b/ee/clickhouse/queries/funnels/test/breakdown_cases.py @@ -88,6 +88,7 @@ def test_funnel_step_breakdown_event(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -98,6 +99,7 @@ def test_funnel_step_breakdown_event(self): "count": 1, "type": "events", "average_conversion_time": 3600.0, + "median_conversion_time": 3600.0, "breakdown": "Chrome", }, { @@ -108,6 +110,7 @@ def test_funnel_step_breakdown_event(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Chrome", }, ], @@ -127,6 +130,7 @@ def test_funnel_step_breakdown_event(self): "count": 2, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -137,6 +141,7 @@ def test_funnel_step_breakdown_event(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Safari", }, { @@ -147,6 +152,7 @@ def test_funnel_step_breakdown_event(self): "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, ], @@ -230,6 +236,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -240,6 +247,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 1, "type": "events", "average_conversion_time": 3600.0, + "median_conversion_time": 3600.0, "breakdown": "Chrome", }, { @@ -250,6 +258,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Chrome", }, ], @@ -269,6 +278,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 2, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -279,6 +289,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Safari", }, { @@ -289,6 +300,7 @@ def test_funnel_step_breakdown_event_no_type(self): "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, ], @@ -364,6 +376,7 @@ def test_funnel_step_breakdown_person(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -374,6 +387,7 @@ def test_funnel_step_breakdown_person(self): "count": 1, "type": "events", "average_conversion_time": 3600.0, + "median_conversion_time": 3600.0, "breakdown": "Chrome", }, { @@ -384,6 +398,7 @@ def test_funnel_step_breakdown_person(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Chrome", }, ], @@ -402,6 +417,7 @@ def test_funnel_step_breakdown_person(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -412,6 +428,7 @@ def test_funnel_step_breakdown_person(self): "count": 1, "type": "events", "average_conversion_time": 7200.0, + "median_conversion_time": 7200.0, "breakdown": "Safari", }, { @@ -422,6 +439,7 @@ def test_funnel_step_breakdown_person(self): "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, ], @@ -535,6 +553,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "0", }, ], @@ -552,6 +571,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, ], @@ -569,6 +589,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Mac", }, ], @@ -586,6 +607,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, ], @@ -650,6 +672,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -660,6 +683,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, ], @@ -678,6 +702,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -688,6 +713,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": 3600, + "median_conversion_time": 3600, "breakdown": "Safari", }, ], diff --git a/ee/clickhouse/queries/funnels/test/test_funnel.py b/ee/clickhouse/queries/funnels/test/test_funnel.py index fe6e449a4f6f6..c20f30fea25b3 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel.py @@ -930,10 +930,15 @@ def test_funnel_step_conversion_times(self): ) result = funnel.run() + self.assertEqual(result[0]["average_conversion_time"], None) self.assertEqual(result[1]["average_conversion_time"], 6000) self.assertEqual(result[2]["average_conversion_time"], 5400) + self.assertEqual(result[0]["median_conversion_time"], None) + self.assertEqual(result[1]["median_conversion_time"], 7200) + self.assertEqual(result[2]["median_conversion_time"], 5400) + def test_funnel_exclusions_invalid_params(self): filters = { "events": [ diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_strict.py b/ee/clickhouse/queries/funnels/test/test_funnel_strict.py index d2dacb249b21c..8cee464c2fc67 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_strict.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_strict.py @@ -107,6 +107,7 @@ def test_strict_breakdown_events_with_multiple_properties(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -117,6 +118,7 @@ def test_strict_breakdown_events_with_multiple_properties(self): "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, ], @@ -135,6 +137,7 @@ def test_strict_breakdown_events_with_multiple_properties(self): "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -145,6 +148,7 @@ def test_strict_breakdown_events_with_multiple_properties(self): "count": 1, "type": "events", "average_conversion_time": 3600, + "median_conversion_time": 3600, "breakdown": "Safari", }, ], diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py b/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py index 43f83b351f47e..817438bbe9866 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_unordered.py @@ -78,6 +78,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, { @@ -88,6 +89,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 0, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Chrome", }, ], @@ -106,6 +108,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": None, + "median_conversion_time": None, "breakdown": "Safari", }, { @@ -116,6 +119,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti "count": 1, "type": "events", "average_conversion_time": 3600, + "median_conversion_time": 3600, "breakdown": "Safari", }, ],