From 833e4c380d5cc86de04cf38563fbaf14fd0ae719 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Wed, 7 Jul 2021 16:07:41 +0100 Subject: [PATCH 1/6] first pass mix and match everything --- ee/clickhouse/queries/funnels/__init__.py | 1 + ee/clickhouse/queries/funnels/base.py | 60 +------ ee/clickhouse/queries/funnels/funnel.py | 54 +++++- .../queries/funnels/funnel_strict.py | 21 ++- .../queries/funnels/funnel_time_to_convert.py | 16 +- .../queries/funnels/funnel_trends.py | 16 +- .../queries/funnels/funnel_unordered.py | 21 ++- .../test/test_funnel_time_to_convert.py | 121 ++++++++++++- .../funnels/test/test_funnel_trends.py | 167 ++++++++++++++++-- ee/clickhouse/views/insights.py | 30 +++- .../views/test/test_clickhouse_insights.py | 64 +++++++ posthog/exceptions.py | 1 + posthog/models/filters/mixins/funnel.py | 2 +- 13 files changed, 474 insertions(+), 100 deletions(-) diff --git a/ee/clickhouse/queries/funnels/__init__.py b/ee/clickhouse/queries/funnels/__init__.py index 35c0785d46bbf..63dee45fc3f00 100644 --- a/ee/clickhouse/queries/funnels/__init__.py +++ b/ee/clickhouse/queries/funnels/__init__.py @@ -1,5 +1,6 @@ from .funnel import ClickhouseFunnel, ClickhouseFunnelNew from .funnel_persons import ClickhouseFunnelPersons +from .funnel_strict import ClickhouseFunnelStrict from .funnel_trends import ClickhouseFunnelTrends from .funnel_trends_persons import ClickhouseFunnelTrendsPersons from .funnel_unordered import ClickhouseFunnelUnordered diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 5e5f1c683b35f..c16edd9c35306 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -100,20 +100,6 @@ def _exec_query(self) -> List[Tuple]: return sync_execute(query, self.params) - def _get_steps_per_person_query(self): - formatted_query = "" - max_steps = len(self._filter.entities) - if max_steps >= 2: - formatted_query = self.build_step_subquery(2, max_steps) - else: - formatted_query = self._get_inner_event_query() - - return f""" - SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {self._get_step_times(max_steps)} {self._get_breakdown_prop()} FROM ( - {formatted_query} - ) WHERE step_0 = 1 - """ - def _get_step_times(self, max_steps: int): conditions: List[str] = [] for i in range(1, max_steps): @@ -124,34 +110,7 @@ def _get_step_times(self, max_steps: int): formatted = ", ".join(conditions) return f", {formatted}" if formatted else "" - def build_step_subquery(self, level_index: int, max_steps: int): - if level_index >= max_steps: - return f""" - SELECT - person_id, - timestamp, - {self.get_partition_cols(1, max_steps)} - {self._get_breakdown_prop()} - FROM ({self._get_inner_event_query()}) - """ - else: - return f""" - SELECT - person_id, - timestamp, - {self.get_partition_cols(level_index, max_steps)} - {self._get_breakdown_prop()} - FROM ( - SELECT - person_id, - timestamp, - {self.get_comparison_cols(level_index, max_steps)} - {self._get_breakdown_prop()} - FROM ({self.build_step_subquery(level_index + 1, max_steps)}) - ) - """ - - def get_partition_cols(self, level_index: int, max_steps: int): + def _get_partition_cols(self, level_index: int, max_steps: int): cols: List[str] = [] for i in range(0, max_steps): cols.append(f"step_{i}") @@ -166,17 +125,6 @@ def get_partition_cols(self, level_index: int, max_steps: int): ) return ", ".join(cols) - def get_comparison_cols(self, level_index: int, max_steps: int): - cols: List[str] = [] - for i in range(0, max_steps): - cols.append(f"step_{i}") - if i < level_index: - cols.append(f"latest_{i}") - else: - comparison = self._get_comparison_at_step(i, level_index) - cols.append(f"if({comparison}, NULL, latest_{i}) as latest_{i}") - return ", ".join(cols) - def _get_comparison_at_step(self, index: int, level_index: int): or_statements: List[str] = [] @@ -311,6 +259,12 @@ def _get_step_time_avgs(self, max_steps: int): def get_query(self, format_properties): pass + def get_step_counts_query(self): + pass + + def get_step_counts_without_aggregation_query(self): + pass + def _get_breakdown_select_prop(self) -> str: if self._filter.breakdown: self.params.update({"breakdown": self._filter.breakdown}) diff --git a/ee/clickhouse/queries/funnels/funnel.py b/ee/clickhouse/queries/funnels/funnel.py index 86aa284d4eb44..a917061d16a59 100644 --- a/ee/clickhouse/queries/funnels/funnel.py +++ b/ee/clickhouse/queries/funnels/funnel.py @@ -24,7 +24,7 @@ def get_query(self, format_properties): """ def get_step_counts_query(self): - steps_per_person_query = self._get_steps_per_person_query() + steps_per_person_query = self.get_step_counts_without_aggregation_query() max_steps = len(self._filter.entities) breakdown_clause = self._get_breakdown_prop() @@ -66,3 +66,55 @@ def _format_single_funnel(self, result, with_breakdown=False): steps.append(serialized_result) return steps[::-1] #  reverse + + def get_step_counts_without_aggregation_query(self): + formatted_query = "" + max_steps = len(self._filter.entities) + if max_steps >= 2: + formatted_query = self.build_step_subquery(2, max_steps) + else: + formatted_query = self._get_inner_event_query() + + return f""" + SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {self._get_step_times(max_steps)} {self._get_breakdown_prop()} FROM ( + {formatted_query} + ) WHERE step_0 = 1 + """ + + def get_comparison_cols(self, level_index: int, max_steps: int): + cols: List[str] = [] + for i in range(0, max_steps): + cols.append(f"step_{i}") + if i < level_index: + cols.append(f"latest_{i}") + else: + comparison = self._get_comparison_at_step(i, level_index) + cols.append(f"if({comparison}, NULL, latest_{i}) as latest_{i}") + return ", ".join(cols) + + def build_step_subquery(self, level_index: int, max_steps: int): + if level_index >= max_steps: + return f""" + SELECT + person_id, + timestamp, + {self._get_partition_cols(1, max_steps)} + {self._get_breakdown_prop()} + FROM ({self._get_inner_event_query()}) + """ + else: + return f""" + SELECT + person_id, + timestamp, + {self._get_partition_cols(level_index, max_steps)} + {self._get_breakdown_prop()} + FROM ( + SELECT + person_id, + timestamp, + {self.get_comparison_cols(level_index, max_steps)} + {self._get_breakdown_prop()} + FROM ({self.build_step_subquery(level_index + 1, max_steps)}) + ) + """ diff --git a/ee/clickhouse/queries/funnels/funnel_strict.py b/ee/clickhouse/queries/funnels/funnel_strict.py index bec31e91fd675..6159aa4d386c7 100644 --- a/ee/clickhouse/queries/funnels/funnel_strict.py +++ b/ee/clickhouse/queries/funnels/funnel_strict.py @@ -16,7 +16,18 @@ def get_step_counts_query(self): max_steps = len(self._filter.entities) - partition_select = self.get_partition_cols(1, max_steps) + formatted_query = self.get_step_counts_without_aggregation_query() + + return f""" + SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( + {formatted_query} + ) GROUP BY person_id + """ + + def get_step_counts_without_aggregation_query(self): + max_steps = len(self._filter.entities) + + partition_select = self._get_partition_cols(1, max_steps) sorting_condition = self._get_sorting_condition(max_steps, max_steps) inner_query = f""" @@ -32,13 +43,9 @@ def get_step_counts_query(self): {inner_query} ) WHERE step_0 = 1""" - return f""" - SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( - {formatted_query} - ) GROUP BY person_id - """ + return formatted_query - def get_partition_cols(self, level_index: int, max_steps: int): + def _get_partition_cols(self, level_index: int, max_steps: int): cols: List[str] = [] for i in range(0, max_steps): cols.append(f"step_{i}") diff --git a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py index deddba1ed15c4..3c4112b97bd23 100644 --- a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py +++ b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py @@ -1,12 +1,22 @@ -from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew +from typing import Type +from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase +from posthog.models.filters.filter import Filter +from posthog.models.team import Team + + +class ClickhouseFunnelTimeToConvert(ClickhouseFunnelBase): + def __init__(self, filter: Filter, team: Team, funnel_order_class: Type[ClickhouseFunnelBase]) -> None: + super().__init__(filter, team) + self.funnel_order = funnel_order_class(filter, team) -class ClickhouseFunnelTimeToConvert(ClickhouseFunnelNew): def _format_results(self, results: list) -> list: return results def get_query(self, format_properties) -> str: - steps_per_person_query = self._get_steps_per_person_query() + steps_per_person_query = self.funnel_order.get_step_counts_query() + self.params.update(self.funnel_order.params) + # expects 1 person per row, whatever their max step is, and the step conversion times for this person # Conversion to which step (from the immediately preceding one) should be calculated to_step = self._filter.funnel_to_step diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index 9ad7a275ecd3b..f216b0eb4c252 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -1,7 +1,10 @@ from datetime import date, datetime, timedelta -from typing import Union +from typing import Type, Union from ee.clickhouse.queries.funnels.base import ClickhouseFunnelBase +from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnelNew +from ee.clickhouse.queries.funnels.funnel_strict import ClickhouseFunnelStrict +from ee.clickhouse.queries.funnels.funnel_unordered import ClickhouseFunnelUnordered from ee.clickhouse.queries.util import get_time_diff, get_trunc_func_ch from posthog.constants import BREAKDOWN from posthog.models.filters.filter import Filter @@ -45,12 +48,15 @@ class ClickhouseFunnelTrends(ClickhouseFunnelBase): If no people have reached step {from_step} in the period, {conversion_rate} is zero. """ - def __init__(self, filter: Filter, team: Team) -> None: + def __init__(self, filter: Filter, team: Team, funnel_order_class: Type[ClickhouseFunnelBase]) -> None: # TODO: allow breakdown if BREAKDOWN in filter._data: del filter._data[BREAKDOWN] + super().__init__(filter, team) + self.funnel_order = funnel_order_class(filter, team) + def run(self, *args, **kwargs): if len(self._filter.entities) == 0: return [] @@ -61,7 +67,11 @@ def perform_query(self): return self._summarize_data(self._exec_query()) def get_query(self, format_properties) -> str: - steps_per_person_query = self._get_steps_per_person_query() + + steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query() + # expects multiple rows for same person, first event time, steps taken. + self.params.update(self.funnel_order.params) + num_intervals, seconds_in_interval, _ = get_time_diff( self._filter.interval or "day", self._filter.date_from, self._filter.date_to, team_id=self._team.pk ) diff --git a/ee/clickhouse/queries/funnels/funnel_unordered.py b/ee/clickhouse/queries/funnels/funnel_unordered.py index 09a3eff4fee2c..d07a2ae9f8fce 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered.py @@ -36,10 +36,21 @@ def get_query(self, format_properties): def get_step_counts_query(self): max_steps = len(self._filter.entities) + + union_query = self.get_step_counts_without_aggregation_query() + + return f""" + SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( + {union_query} + ) GROUP BY person_id + """ + + def get_step_counts_without_aggregation_query(self): + max_steps = len(self._filter.entities) union_queries = [] entities_to_use = list(self._filter.entities) - partition_select = self.get_partition_cols(1, max_steps) + partition_select = self._get_partition_cols(1, max_steps) sorting_condition = self.get_sorting_condition(max_steps) for i in range(max_steps): @@ -60,13 +71,7 @@ def get_step_counts_query(self): entities_to_use.append(entities_to_use.pop(0)) union_queries.append(formatted_query) - union_formatted_query = " UNION ALL ".join(union_queries) - - return f""" - SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} FROM ( - {union_formatted_query} - ) GROUP BY person_id - """ + return " UNION ALL ".join(union_queries) def get_sorting_condition(self, max_steps: int): diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_time_to_convert.py b/ee/clickhouse/queries/funnels/test/test_funnel_time_to_convert.py index 4f28ac669ac86..b11aef6ea1ba6 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_time_to_convert.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_time_to_convert.py @@ -4,8 +4,8 @@ import pytz from ee.clickhouse.models.event import create_event +from ee.clickhouse.queries.funnels import ClickhouseFunnelNew, ClickhouseFunnelStrict, ClickhouseFunnelUnordered from ee.clickhouse.queries.funnels.funnel_time_to_convert import ClickhouseFunnelTimeToConvert -from ee.clickhouse.queries.funnels.funnel_trends import ClickhouseFunnelTrends from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS, TRENDS_LINEAR from posthog.models.filters import Filter @@ -61,7 +61,124 @@ def test_basic(self): } ) - funnel_trends = ClickhouseFunnelTimeToConvert(filter, self.team) + funnel_trends = ClickhouseFunnelTimeToConvert(filter, self.team, ClickhouseFunnelNew) + results = funnel_trends.run() + + # Autobinned using the maximum time to convert + # Since the maximum time to step 1 from 0 is user C's 23 h (82_800 s) and the number of bins is 10, + # each bin is an interval of 8280 s, starting with 0, left-inclusive + self.assertEqual( + results, + [ + (8280.0, 2), # Reached step 1 from step 0 in less than 8280 s - users A and B fall into this bin + (16560.0, 0), # Reached step 1 from step 0 in at least 8280 s but less than 16_560 s - no users + (24840.0, 0), # And so on + (33120.0, 0), + (41400.0, 0), + (49680.0, 0), + (57960.0, 0), + (66240.0, 0), + (74520.0, 0), + (82800.0, 0), + (91080.0, 1), # >= 82_800 && < 91_080 - user C falls into this bin, with a time of exactly 82_800 s + ], + ) + + def test_basic_unordered(self): + _create_person(distinct_ids=["user a"], team=self.team) + _create_person(distinct_ids=["user b"], team=self.team) + _create_person(distinct_ids=["user c"], team=self.team) + + _create_event(event="step three", distinct_id="user a", team=self.team, timestamp="2021-06-08 18:00:00") + _create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-08 19:00:00") + _create_event(event="step two", distinct_id="user a", team=self.team, timestamp="2021-06-08 21:00:00") + + _create_event(event="step one", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:00:00") + _create_event(event="step two", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:37:00") + + _create_event(event="step two", distinct_id="user c", team=self.team, timestamp="2021-06-11 07:00:00") + _create_event(event="step one", distinct_id="user c", team=self.team, timestamp="2021-06-12 06:00:00") + + filter = Filter( + data={ + "insight": INSIGHT_FUNNELS, + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-06-07 00:00:00", + "date_to": "2021-06-13 23:59:59", + "funnel_to_step": 1, + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + ) + + funnel_trends = ClickhouseFunnelTimeToConvert(filter, self.team, ClickhouseFunnelUnordered) + results = funnel_trends.run() + + # Autobinned using the maximum time to convert + # Since the maximum time to step 1 from 0 is user C's 23 h (82_800 s) and the number of bins is 10, + # each bin is an interval of 8280 s, starting with 0, left-inclusive + self.assertEqual( + results, + [ + (8280.0, 2), # Reached step 1 from step 0 in less than 8280 s - users A and B fall into this bin + (16560.0, 0), # Reached step 1 from step 0 in at least 8280 s but less than 16_560 s - no users + (24840.0, 0), # And so on + (33120.0, 0), + (41400.0, 0), + (49680.0, 0), + (57960.0, 0), + (66240.0, 0), + (74520.0, 0), + (82800.0, 0), + (91080.0, 1), # >= 82_800 && < 91_080 - user C falls into this bin, with a time of exactly 82_800 s + ], + ) + + def test_basic_strict(self): + _create_person(distinct_ids=["user a"], team=self.team) + _create_person(distinct_ids=["user b"], team=self.team) + _create_person(distinct_ids=["user c"], team=self.team) + _create_person(distinct_ids=["user d"], team=self.team) + + _create_event(event="step one", distinct_id="user a", team=self.team, timestamp="2021-06-08 18:00:00") + _create_event(event="step two", distinct_id="user a", team=self.team, timestamp="2021-06-08 19:00:00") + _create_event(event="step three", distinct_id="user a", team=self.team, timestamp="2021-06-08 21:00:00") + + _create_event(event="step one", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:00:00") + _create_event(event="step two", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:37:00") + _create_event(event="blah", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:38:00") + _create_event(event="step three", distinct_id="user b", team=self.team, timestamp="2021-06-09 13:39:00") + + _create_event(event="step one", distinct_id="user c", team=self.team, timestamp="2021-06-11 07:00:00") + _create_event(event="step two", distinct_id="user c", team=self.team, timestamp="2021-06-12 06:00:00") + + _create_event(event="step one", distinct_id="user d", team=self.team, timestamp="2021-06-11 07:00:00") + _create_event(event="blah", distinct_id="user d", team=self.team, timestamp="2021-06-12 07:00:00") + _create_event(event="step two", distinct_id="user d", team=self.team, timestamp="2021-06-12 09:00:00") + + filter = Filter( + data={ + "insight": INSIGHT_FUNNELS, + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-06-07 00:00:00", + "date_to": "2021-06-13 23:59:59", + "funnel_to_step": 1, + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + ) + + funnel_trends = ClickhouseFunnelTimeToConvert(filter, self.team, ClickhouseFunnelStrict) results = funnel_trends.run() # Autobinned using the maximum time to convert diff --git a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py index 6f8fa0f36deaf..6acde4b562dec 100644 --- a/ee/clickhouse/queries/funnels/test/test_funnel_trends.py +++ b/ee/clickhouse/queries/funnels/test/test_funnel_trends.py @@ -4,6 +4,7 @@ import pytz from ee.clickhouse.models.event import create_event +from ee.clickhouse.queries.funnels import ClickhouseFunnelNew, ClickhouseFunnelStrict, ClickhouseFunnelUnordered from ee.clickhouse.queries.funnels.funnel_trends import ClickhouseFunnelTrends from ee.clickhouse.util import ClickhouseTestMixin from posthog.constants import INSIGHT_FUNNELS, TRENDS_LINEAR @@ -89,7 +90,7 @@ def test_no_event_in_period(self): } ) - funnel_trends = ClickhouseFunnelTrends(filter, self.team) + funnel_trends = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew) results = funnel_trends.perform_query() self.assertEqual(len(results), 7) @@ -114,7 +115,7 @@ def test_only_one_user_reached_one_step(self): } ) - funnel_trends = ClickhouseFunnelTrends(filter, self.team) + funnel_trends = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew) results = funnel_trends.perform_query() self.assertEqual(len(results), 7) @@ -204,7 +205,7 @@ def test_hour_interval(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 145) def test_day_interval(self): @@ -223,7 +224,7 @@ def test_day_interval(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 7) def test_week_interval(self): @@ -242,7 +243,7 @@ def test_week_interval(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(2, len(results)) def test_month_interval(self): @@ -261,7 +262,7 @@ def test_month_interval(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 1) def test_all_results_for_day_interval(self): @@ -282,7 +283,7 @@ def test_all_results_for_day_interval(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() saturday = results[0] # 5/1 self.assertEqual(3, saturday["reached_to_step_count"]) @@ -344,7 +345,7 @@ def test_window_size_one_day(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() saturday = results[0] # 5/1 self.assertEqual(1, saturday["reached_to_step_count"]) @@ -421,7 +422,7 @@ def test_period_not_final(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 2) @@ -472,7 +473,7 @@ def test_two_runs_by_single_user_in_one_period(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 1) @@ -504,7 +505,7 @@ def test_steps_performed_in_period_but_in_reverse(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 1) @@ -550,7 +551,7 @@ def test_one_person_in_multiple_periods_and_windows(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 4) @@ -616,7 +617,7 @@ def test_from_second_step(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 2) @@ -670,7 +671,7 @@ def test_to_second_step(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(len(results), 2) @@ -704,7 +705,7 @@ def test_window_size_one_day_not_broken_by_breakdown(self): ], } ) - results = ClickhouseFunnelTrends(filter, self.team).perform_query() + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelNew).perform_query() filter_breakdown = Filter( data={ @@ -723,6 +724,140 @@ def test_window_size_one_day_not_broken_by_breakdown(self): ], } ) - results_breakdown = ClickhouseFunnelTrends(filter_breakdown, self.team).perform_query() + results_breakdown = ClickhouseFunnelTrends(filter_breakdown, self.team, ClickhouseFunnelNew).perform_query() self.assertEqual(results_breakdown, results) + + def test_one_person_in_multiple_periods_and_windows_in_unordered_funnel(self): + _create_person(distinct_ids=["user_one"], team=self.team) + _create_person(distinct_ids=["user_two"], team=self.team) + + # 1st user's 1st complete run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 03:00:00") + + # 1st user's incomplete run + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-03 01:00:00") + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-03 02:00:00") + + # 2nd user's incomplete run + _create_event(event="step one", distinct_id="user_two", team=self.team, timestamp="2021-05-04 18:00:00") + + # 1st user's 2nd complete run + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-04 11:00:00") + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-04 12:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-04 13:00:00") + + filter = Filter( + data={ + "insight": INSIGHT_FUNNELS, + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-04 23:59:59", + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + ) + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelUnordered).perform_query() + + self.assertEqual(len(results), 4) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + self.assertEqual(day_1["is_period_final"], True) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 0) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + self.assertEqual(day_2["is_period_final"], True) + + day_3 = results[2] # 2021-05-03 + self.assertEqual(day_3["reached_from_step_count"], 1) + self.assertEqual(day_3["reached_to_step_count"], 0) + self.assertEqual(day_3["conversion_rate"], 0) + self.assertEqual(day_3["is_period_final"], True) + + day_4 = results[3] # 2021-05-04 + self.assertEqual(day_4["reached_from_step_count"], 2) + self.assertEqual(day_4["reached_to_step_count"], 1) + self.assertEqual(day_4["conversion_rate"], 50) + self.assertEqual(day_4["is_period_final"], True) + + def test_one_person_in_multiple_periods_and_windows_in_strict_funnel(self): + _create_person(distinct_ids=["user_one"], team=self.team) + _create_person(distinct_ids=["user_two"], team=self.team) + + # 1st user's 1st complete run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-01 02:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-01 03:00:00") + + # 1st user's incomplete run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-03 01:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-03 02:00:00") + # broken because strict + _create_event(event="blah", distinct_id="user_one", team=self.team, timestamp="2021-05-03 02:30:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-03 03:00:00") + + # 2nd user's incomplete run + _create_event(event="step one", distinct_id="user_two", team=self.team, timestamp="2021-05-04 18:00:00") + # broken because strict + _create_event(event="blah", distinct_id="user_two", team=self.team, timestamp="2021-05-04 18:20:00") + _create_event(event="step two", distinct_id="user_two", team=self.team, timestamp="2021-05-04 19:00:00") + + # 1st user's 2nd complete run + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-04 11:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-04 12:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-04 13:00:00") + + filter = Filter( + data={ + "insight": INSIGHT_FUNNELS, + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-04 23:59:59", + "funnel_window_days": 1, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + {"id": "step three", "order": 2}, + ], + } + ) + results = ClickhouseFunnelTrends(filter, self.team, ClickhouseFunnelStrict).perform_query() + + self.assertEqual(len(results), 4) + + day_1 = results[0] # 2021-05-01 + self.assertEqual(day_1["reached_from_step_count"], 1) + self.assertEqual(day_1["reached_to_step_count"], 1) + self.assertEqual(day_1["conversion_rate"], 100) + self.assertEqual(day_1["is_period_final"], True) + + day_2 = results[1] # 2021-05-02 + self.assertEqual(day_2["reached_from_step_count"], 0) + self.assertEqual(day_2["reached_to_step_count"], 0) + self.assertEqual(day_2["conversion_rate"], 0) + self.assertEqual(day_2["is_period_final"], True) + + day_3 = results[2] # 2021-05-03 + self.assertEqual(day_3["reached_from_step_count"], 1) + self.assertEqual(day_3["reached_to_step_count"], 0) + self.assertEqual(day_3["conversion_rate"], 0) + self.assertEqual(day_3["is_period_final"], True) + + day_4 = results[3] # 2021-05-04 + self.assertEqual(day_4["reached_from_step_count"], 2) + self.assertEqual(day_4["reached_to_step_count"], 1) + self.assertEqual(day_4["conversion_rate"], 50) + self.assertEqual(day_4["is_period_final"], True) diff --git a/ee/clickhouse/views/insights.py b/ee/clickhouse/views/insights.py index 3e3b07256638f..270c305a26b67 100644 --- a/ee/clickhouse/views/insights.py +++ b/ee/clickhouse/views/insights.py @@ -7,7 +7,13 @@ from ee.clickhouse.queries.clickhouse_paths import ClickhousePaths from ee.clickhouse.queries.clickhouse_retention import ClickhouseRetention from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness -from ee.clickhouse.queries.funnels import ClickhouseFunnel, ClickhouseFunnelTrends, ClickhouseFunnelUnordered +from ee.clickhouse.queries.funnels import ( + ClickhouseFunnel, + ClickhouseFunnelStrict, + ClickhouseFunnelTrends, + ClickhouseFunnelUnordered, +) +from ee.clickhouse.queries.funnels.funnel_time_to_convert import ClickhouseFunnelTimeToConvert from ee.clickhouse.queries.funnels.funnel_trends import ClickhouseFunnelTrends from ee.clickhouse.queries.sessions.clickhouse_sessions import ClickhouseSessions from ee.clickhouse.queries.trends.clickhouse_trends import ClickhouseTrends @@ -74,12 +80,24 @@ def calculate_funnel(self, request: Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS}) - if filter.funnel_viz_type == FunnelVizType.TRENDS or filter.display == TRENDS_LINEAR: - return {"result": ClickhouseFunnelTrends(team=team, filter=filter).run()} - elif filter.funnel_order_type == FunnelOrderType.UNORDERED: - return {"result": ClickhouseFunnelUnordered(team=team, filter=filter).run()} + funnel_order_class = ClickhouseFunnel + if filter.funnel_order_type == FunnelOrderType.UNORDERED: + funnel_order_class = ClickhouseFunnelUnordered + elif filter.funnel_order_type == FunnelOrderType.STRICT: + funnel_order_class = ClickhouseFunnelStrict + + if filter.funnel_viz_type == FunnelVizType.TRENDS: + return { + "result": ClickhouseFunnelTrends(team=team, filter=filter, funnel_order_class=funnel_order_class).run() + } + elif filter.funnel_viz_type == FunnelVizType.TIME_TO_CONVERT: + return { + "result": ClickhouseFunnelTimeToConvert( + team=team, filter=filter, funnel_order_class=funnel_order_class + ).run() + } else: - return {"result": ClickhouseFunnel(team=team, filter=filter).run()} + return {"result": funnel_order_class(team=team, filter=filter).run()} @cached_function() def calculate_retention(self, request: Request) -> Dict[str, Any]: diff --git a/ee/clickhouse/views/test/test_clickhouse_insights.py b/ee/clickhouse/views/test/test_clickhouse_insights.py index f95c71f275981..662ed3a86be67 100644 --- a/ee/clickhouse/views/test/test_clickhouse_insights.py +++ b/ee/clickhouse/views/test/test_clickhouse_insights.py @@ -44,6 +44,7 @@ def test_funnel_unordered_basic_post(self): ], "funnel_window_days": 14, "funnel_order_type": "unordered", + "insight": INSIGHT_FUNNELS, }, ).json() @@ -53,6 +54,35 @@ def test_funnel_unordered_basic_post(self): self.assertEqual(response["result"][0]["count"], 2) self.assertEqual(response["result"][1]["count"], 2) + def test_funnel_strict_basic_post(self): + _create_person(distinct_ids=["1"], team=self.team) + _create_event(team=self.team, event="step one", distinct_id="1") + _create_event(team=self.team, event="step two", distinct_id="1") + + _create_person(distinct_ids=["2"], team=self.team) + _create_event(team=self.team, event="step one", distinct_id="2") + _create_event(team=self.team, event="blahh", distinct_id="2") + _create_event(team=self.team, event="step two", distinct_id="2") + + response = self.client.post( + "/api/insight/funnel/", + { + "events": [ + {"id": "step one", "type": "events", "order": 0}, + {"id": "step two", "type": "events", "order": 1}, + ], + "funnel_window_days": 14, + "funnel_order_type": "strict", + "insight": INSIGHT_FUNNELS, + }, + ).json() + + self.assertEqual(len(response["result"]), 2) + self.assertEqual(response["result"][0]["name"], "step one") + self.assertEqual(response["result"][1]["name"], "step two") + self.assertEqual(response["result"][0]["count"], 2) + self.assertEqual(response["result"][1]["count"], 1) + def test_funnel_trends_basic_post(self): _create_person(distinct_ids=["user_one"], team=self.team) _create_person(distinct_ids=["user_two"], team=self.team) @@ -84,3 +114,37 @@ def test_funnel_trends_basic_post(self): self.assertEqual(len(response["result"]), 1) self.assertEqual(response["result"][0]["count"], 7) self.assertEqual(response["result"][0]["data"], [100, 100, 0, 0, 0, 0, 0]) + + def test_funnel_trends_unordered_basic_post(self): + _create_person(distinct_ids=["user_one"], team=self.team) + _create_person(distinct_ids=["user_two"], team=self.team) + # user_one, funnel steps: one, two three + _create_event(event="step one", distinct_id="user_one", team=self.team, timestamp="2021-05-01 01:00:00") + _create_event(event="step two", distinct_id="user_one", team=self.team, timestamp="2021-05-03 00:00:00") + _create_event(event="step three", distinct_id="user_one", team=self.team, timestamp="2021-05-05 00:00:00") + + # user_two, funnel steps: one, two + _create_event(event="step one", distinct_id="user_two", team=self.team, timestamp="2021-05-02 00:00:00") + _create_event(event="step two", distinct_id="user_two", team=self.team, timestamp="2021-05-04 00:00:00") + _create_event(event="step three", distinct_id="user_two", team=self.team, timestamp="2021-05-05 00:00:00") + + response = self.client.post( + "/api/insight/funnel/", + { + "events": [ + {"id": "step one", "type": "events", "order": 0}, + {"id": "step two", "type": "events", "order": 1}, + {"id": "step three", "type": "events", "order": 2}, + ], + "funnel_window_days": 7, + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 23:59:59", + "funnel_viz_type": "trends", + }, + ).json() + + self.assertEqual(len(response["result"]), 1) + self.assertEqual(response["result"][0]["count"], 7) + self.assertEqual(response["result"][0]["data"], [100, 100, 0, 0, 0, 0, 0]) + + # TODO: Funnel time to convert basic API test diff --git a/posthog/exceptions.py b/posthog/exceptions.py index 597022e46fc01..446b190d97363 100644 --- a/posthog/exceptions.py +++ b/posthog/exceptions.py @@ -25,6 +25,7 @@ def exception_reporting(exception: Exception, *args, **kwargs) -> None: Determines which exceptions to report and sends them to Sentry. Used through drf-exceptions-hog """ + print(exception) if not isinstance(exception, APIException): capture_exception(exception) diff --git a/posthog/models/filters/mixins/funnel.py b/posthog/models/filters/mixins/funnel.py index 1a10cb6620d5e..dfc735567f810 100644 --- a/posthog/models/filters/mixins/funnel.py +++ b/posthog/models/filters/mixins/funnel.py @@ -89,7 +89,7 @@ def funnel_order_type(self) -> Optional[FunnelOrderType]: def funnel_viz_type(self) -> Optional[FunnelVizType]: funnel_viz_type = self._data.get(FUNNEL_VIZ_TYPE) if ( - not funnel_viz_type is None + funnel_viz_type is None and self._data.get(INSIGHT) == INSIGHT_FUNNELS and self._data.get(DISPLAY) == TRENDS_LINEAR ): From 396cd8e45b0d243c793466814ba2c9a5f330e682 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Wed, 7 Jul 2021 16:37:33 +0100 Subject: [PATCH 2/6] cleanup --- ee/clickhouse/queries/funnels/__init__.py | 1 + ee/clickhouse/queries/funnels/funnel_unordered.py | 6 +++--- ee/clickhouse/views/insights.py | 5 +++-- posthog/exceptions.py | 1 - 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ee/clickhouse/queries/funnels/__init__.py b/ee/clickhouse/queries/funnels/__init__.py index 63dee45fc3f00..fc298ee9d2fe7 100644 --- a/ee/clickhouse/queries/funnels/__init__.py +++ b/ee/clickhouse/queries/funnels/__init__.py @@ -1,3 +1,4 @@ +from .base import ClickhouseFunnelBase from .funnel import ClickhouseFunnel, ClickhouseFunnelNew from .funnel_persons import ClickhouseFunnelPersons from .funnel_strict import ClickhouseFunnelStrict diff --git a/ee/clickhouse/queries/funnels/funnel_unordered.py b/ee/clickhouse/queries/funnels/funnel_unordered.py index bc64d25a8df99..d66f8eeb08d7b 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered.py @@ -40,8 +40,8 @@ def get_step_counts_query(self): union_query = self.get_step_counts_without_aggregation_query() return f""" - SELECT person_id, steps_initial as steps {self._get_step_time_avgs(max_steps)} FROM ( - SELECT person_id, steps_initial, max(steps_initial) over (PARTITION BY person_id) as max_steps {self._get_step_time_names(max_steps)} FROM ( + SELECT person_id, steps {self._get_step_time_avgs(max_steps)} FROM ( + SELECT person_id, steps, max(steps) over (PARTITION BY person_id) as max_steps {self._get_step_time_names(max_steps)} FROM ( {union_query} ) ) GROUP BY person_id, steps @@ -66,7 +66,7 @@ def get_step_counts_without_aggregation_query(self): """ formatted_query = f""" - SELECT *, {sorting_condition} AS steps_initial {self._get_step_times(max_steps)} FROM ( + SELECT *, {sorting_condition} AS steps {self._get_step_times(max_steps)} FROM ( {inner_query} ) WHERE step_0 = 1""" diff --git a/ee/clickhouse/views/insights.py b/ee/clickhouse/views/insights.py index d5f2ed59bec6b..699b5e2f50599 100644 --- a/ee/clickhouse/views/insights.py +++ b/ee/clickhouse/views/insights.py @@ -1,4 +1,4 @@ -from typing import Any, Dict +from typing import Any, Dict, Type from rest_framework.decorators import action from rest_framework.request import Request @@ -9,6 +9,7 @@ from ee.clickhouse.queries.clickhouse_stickiness import ClickhouseStickiness from ee.clickhouse.queries.funnels import ( ClickhouseFunnel, + ClickhouseFunnelBase, ClickhouseFunnelStrict, ClickhouseFunnelTrends, ClickhouseFunnelUnordered, @@ -81,7 +82,7 @@ def calculate_funnel(self, request: Request) -> Dict[str, Any]: team = self.team filter = Filter(request=request, data={"insight": INSIGHT_FUNNELS}) - funnel_order_class = ClickhouseFunnel + funnel_order_class: Type[ClickhouseFunnelBase] = ClickhouseFunnel if filter.funnel_order_type == FunnelOrderType.UNORDERED: funnel_order_class = ClickhouseFunnelUnordered elif filter.funnel_order_type == FunnelOrderType.STRICT: diff --git a/posthog/exceptions.py b/posthog/exceptions.py index 446b190d97363..597022e46fc01 100644 --- a/posthog/exceptions.py +++ b/posthog/exceptions.py @@ -25,7 +25,6 @@ def exception_reporting(exception: Exception, *args, **kwargs) -> None: Determines which exceptions to report and sends them to Sentry. Used through drf-exceptions-hog """ - print(exception) if not isinstance(exception, APIException): capture_exception(exception) From 5c8ae7b9ca2ed76544bb8f9805fd42be829acabc Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 8 Jul 2021 11:32:20 +0100 Subject: [PATCH 3/6] small refactoring of get_query --- ee/clickhouse/queries/funnels/base.py | 42 +++++++------------ ee/clickhouse/queries/funnels/funnel.py | 2 +- .../queries/funnels/funnel_persons.py | 6 +-- .../queries/funnels/funnel_strict.py | 24 +---------- .../queries/funnels/funnel_strict_persons.py | 4 +- .../queries/funnels/funnel_time_to_convert.py | 2 +- .../queries/funnels/funnel_trends.py | 2 +- .../queries/funnels/funnel_trends_persons.py | 2 +- .../queries/funnels/funnel_unordered.py | 24 +---------- .../funnels/funnel_unordered_persons.py | 6 +-- 10 files changed, 29 insertions(+), 85 deletions(-) diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index c16edd9c35306..f1be01128c22a 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -46,16 +46,21 @@ def run(self, *args, **kwargs): def _format_results(self, results): # Format of this is [step order, person count (that reached that step), array of person uuids] steps = [] - relevant_people = [] total_people = 0 for step in reversed(self._filter.entities): - # Clickhouse step order starts at one, hence the +1 - result_step = [x for x in results if step.order + 1 == x[0]] - if len(result_step) > 0: - total_people += result_step[0][1] - relevant_people += result_step[0][2] - steps.append(self._serialize_step(step, total_people, relevant_people[0:100])) + + if results[0] and len(results[0]) > 0: + total_people += results[0][step.order] + + serialized_result = self._serialize_step(step, total_people, []) + if step.order > 0: + serialized_result.update( + {"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]} + ) + else: + serialized_result.update({"average_conversion_time": None}) + steps.append(serialized_result) return steps[::-1] #  reverse @@ -67,6 +72,7 @@ def _exec_query(self) -> List[Tuple]: allow_denormalized_props=True, filter_test_accounts=self._filter.filter_test_accounts, ) + # TODO: can I remove this? - doesn't seem like it's used appropriately in new funnels? # format default dates data = {} @@ -76,27 +82,9 @@ def _exec_query(self) -> List[Tuple]: data.update({"date_to": timezone.now()}) self._filter = self._filter.with_data(data) - parsed_date_from, parsed_date_to, _ = parse_timestamps( - filter=self._filter, table="events.", team_id=self._team.pk - ) self.params.update(prop_filter_params) - steps = [self._build_step_query(entity, index, "events") for index, entity in enumerate(self._filter.entities)] - - format_properties = { - "team_id": self._team.id, - "steps": ", ".join(steps), - "filters": prop_filters.replace("uuid IN", "events.uuid IN", 1), - "parsed_date_from": parsed_date_from, - "parsed_date_to": parsed_date_to, - "top_level_groupby": "", - "extra_select": "", - "extra_groupby": "", - "within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days), - "latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL, - "offset": self._filter.offset, - } - query = self.get_query(format_properties) + query = self.get_query() return sync_execute(query, self.params) @@ -256,7 +244,7 @@ def _get_step_time_avgs(self, max_steps: int): return f", {formatted}" if formatted else "" @abstractmethod - def get_query(self, format_properties): + def get_query(self): pass def get_step_counts_query(self): diff --git a/ee/clickhouse/queries/funnels/funnel.py b/ee/clickhouse/queries/funnels/funnel.py index 64b3f57601e29..abdcb13dbeb10 100644 --- a/ee/clickhouse/queries/funnels/funnel.py +++ b/ee/clickhouse/queries/funnels/funnel.py @@ -4,7 +4,7 @@ class ClickhouseFunnel(ClickhouseFunnelBase): - def get_query(self, format_properties): + def get_query(self): steps_per_person_query = self.get_step_counts_query() max_steps = len(self._filter.entities) diff --git a/ee/clickhouse/queries/funnels/funnel_persons.py b/ee/clickhouse/queries/funnels/funnel_persons.py index e3f0ff2cf002e..19d8f67f35651 100644 --- a/ee/clickhouse/queries/funnels/funnel_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_persons.py @@ -4,11 +4,11 @@ class ClickhouseFunnelPersons(ClickhouseFunnel): - def get_query(self, format_properties): + def get_query(self): return FUNNEL_PERSONS_BY_STEP_SQL.format( - **format_properties, + offset=self._filter.offset, steps_per_person_query=self.get_step_counts_query(), - persons_steps=self._get_funnel_person_step_condition() + persons_steps=self._get_funnel_person_step_condition(), ) def _format_results(self, results): diff --git a/ee/clickhouse/queries/funnels/funnel_strict.py b/ee/clickhouse/queries/funnels/funnel_strict.py index 6159aa4d386c7..f34d9991d7b65 100644 --- a/ee/clickhouse/queries/funnels/funnel_strict.py +++ b/ee/clickhouse/queries/funnels/funnel_strict.py @@ -4,7 +4,7 @@ class ClickhouseFunnelStrict(ClickhouseFunnelBase): - def get_query(self, format_properties): + def get_query(self): max_steps = len(self._filter.entities) return f""" SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM ( @@ -56,25 +56,3 @@ def _get_partition_cols(self, level_index: int, max_steps: int): f"min(latest_{i}) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) latest_{i}" ) return ", ".join(cols) - - # TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this - def _format_results(self, results): - # Format of this is [step order, person count (that reached that step), array of person uuids] - steps = [] - total_people = 0 - - for step in reversed(self._filter.entities): - - if results[0] and len(results[0]) > 0: - total_people += results[0][step.order] - - serialized_result = self._serialize_step(step, total_people, []) - if step.order > 0: - serialized_result.update( - {"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]} - ) - else: - serialized_result.update({"average_conversion_time": None}) - steps.append(serialized_result) - - return steps[::-1] #  reverse diff --git a/ee/clickhouse/queries/funnels/funnel_strict_persons.py b/ee/clickhouse/queries/funnels/funnel_strict_persons.py index 473acec53d278..78670cc131ffa 100644 --- a/ee/clickhouse/queries/funnels/funnel_strict_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_strict_persons.py @@ -6,9 +6,9 @@ class ClickhouseFunnelStrictPersons(ClickhouseFunnelStrict): def get_query(self, format_properties): return FUNNEL_PERSONS_BY_STEP_SQL.format( - **format_properties, + offset=self._filter.offset, steps_per_person_query=self.get_step_counts_query(), - persons_steps=self._get_funnel_person_step_condition() + persons_steps=self._get_funnel_person_step_condition(), ) def _format_results(self, results): diff --git a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py index 3c4112b97bd23..b3945ab89222b 100644 --- a/ee/clickhouse/queries/funnels/funnel_time_to_convert.py +++ b/ee/clickhouse/queries/funnels/funnel_time_to_convert.py @@ -13,7 +13,7 @@ def __init__(self, filter: Filter, team: Team, funnel_order_class: Type[Clickhou def _format_results(self, results: list) -> list: return results - def get_query(self, format_properties) -> str: + def get_query(self) -> str: steps_per_person_query = self.funnel_order.get_step_counts_query() self.params.update(self.funnel_order.params) # expects 1 person per row, whatever their max step is, and the step conversion times for this person diff --git a/ee/clickhouse/queries/funnels/funnel_trends.py b/ee/clickhouse/queries/funnels/funnel_trends.py index fc1e1199d5058..5fb43a54bfd59 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends.py +++ b/ee/clickhouse/queries/funnels/funnel_trends.py @@ -63,7 +63,7 @@ def run(self, *args, **kwargs): def perform_query(self): return self._summarize_data(self._exec_query()) - def get_query(self, format_properties) -> str: + def get_query(self) -> str: steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query() # expects multiple rows for same person, first event time, steps taken. diff --git a/ee/clickhouse/queries/funnels/funnel_trends_persons.py b/ee/clickhouse/queries/funnels/funnel_trends_persons.py index 1690eb056a55e..b029d257d9c58 100644 --- a/ee/clickhouse/queries/funnels/funnel_trends_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_trends_persons.py @@ -2,5 +2,5 @@ class ClickhouseFunnelTrendsPersons(ClickhouseFunnelBase): - def get_query(self, format_properties): + def get_query(self): pass diff --git a/ee/clickhouse/queries/funnels/funnel_unordered.py b/ee/clickhouse/queries/funnels/funnel_unordered.py index d66f8eeb08d7b..40a4937d48d97 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered.py @@ -23,7 +23,7 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase): Here, `step_i` (0 indexed) signifies the number of people that did at least `i+1` steps. """ - def get_query(self, format_properties): + def get_query(self): max_steps = len(self._filter.entities) @@ -114,25 +114,3 @@ def get_sorting_condition(self, max_steps: int): return f"arraySum([{','.join(basic_conditions)}, 1])" else: return "1" - - # TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this - def _format_results(self, results): - # Format of this is [step order, person count (that reached that step), array of person uuids] - steps = [] - total_people = 0 - - for step in reversed(self._filter.entities): - - if results[0] and len(results[0]) > 0: - total_people += results[0][step.order] - - serialized_result = self._serialize_step(step, total_people, []) - if step.order > 0: - serialized_result.update( - {"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]} - ) - else: - serialized_result.update({"average_conversion_time": None}) - steps.append(serialized_result) - - return steps[::-1] #  reverse diff --git a/ee/clickhouse/queries/funnels/funnel_unordered_persons.py b/ee/clickhouse/queries/funnels/funnel_unordered_persons.py index bd2b7bdb119b0..b3279fc2f93e6 100644 --- a/ee/clickhouse/queries/funnels/funnel_unordered_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_unordered_persons.py @@ -4,11 +4,11 @@ class ClickhouseFunnelUnorderedPersons(ClickhouseFunnelUnordered): - def get_query(self, format_properties): + def get_query(self): return FUNNEL_PERSONS_BY_STEP_SQL.format( - **format_properties, + offset=self._filter.offset, steps_per_person_query=self.get_step_counts_query(), - persons_steps=self._get_funnel_person_step_condition() + persons_steps=self._get_funnel_person_step_condition(), ) def _format_results(self, results): From 524bfdca927610b205ce8516796d3f27f048457f Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 8 Jul 2021 11:38:39 +0100 Subject: [PATCH 4/6] remove top level filter props --- ee/clickhouse/queries/funnels/base.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index f1be01128c22a..95251894c3854 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -65,14 +65,6 @@ def _format_results(self, results): return steps[::-1] #  reverse def _exec_query(self) -> List[Tuple]: - prop_filters, prop_filter_params = parse_prop_clauses( - self._filter.properties, - self._team.pk, - prepend="global", - allow_denormalized_props=True, - filter_test_accounts=self._filter.filter_test_accounts, - ) - # TODO: can I remove this? - doesn't seem like it's used appropriately in new funnels? # format default dates data = {} @@ -82,8 +74,6 @@ def _exec_query(self) -> List[Tuple]: data.update({"date_to": timezone.now()}) self._filter = self._filter.with_data(data) - self.params.update(prop_filter_params) - query = self.get_query() return sync_execute(query, self.params) From 69f5372c1673f70ccd8bd9685c009a2b32cedc70 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 8 Jul 2021 11:51:57 +0100 Subject: [PATCH 5/6] save the file --- ee/clickhouse/queries/funnels/funnel_strict_persons.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/clickhouse/queries/funnels/funnel_strict_persons.py b/ee/clickhouse/queries/funnels/funnel_strict_persons.py index 78670cc131ffa..487abb6606011 100644 --- a/ee/clickhouse/queries/funnels/funnel_strict_persons.py +++ b/ee/clickhouse/queries/funnels/funnel_strict_persons.py @@ -4,7 +4,7 @@ class ClickhouseFunnelStrictPersons(ClickhouseFunnelStrict): - def get_query(self, format_properties): + def get_query(self): return FUNNEL_PERSONS_BY_STEP_SQL.format( offset=self._filter.offset, steps_per_person_query=self.get_step_counts_query(), From e6d83a52ee8bb98b729cf3d863a07cc743699970 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Thu, 8 Jul 2021 18:16:17 +0100 Subject: [PATCH 6/6] rm dupes --- ee/clickhouse/queries/funnels/base.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ee/clickhouse/queries/funnels/base.py b/ee/clickhouse/queries/funnels/base.py index 4ce5ee6a7aa7f..95251894c3854 100644 --- a/ee/clickhouse/queries/funnels/base.py +++ b/ee/clickhouse/queries/funnels/base.py @@ -243,12 +243,6 @@ def get_step_counts_query(self): def get_step_counts_without_aggregation_query(self): pass - def get_step_counts_query(self): - pass - - def get_step_counts_without_aggregation_query(self): - pass - def _get_breakdown_select_prop(self) -> str: if self._filter.breakdown: self.params.update({"breakdown": self._filter.breakdown})