Skip to content

Commit

Permalink
Refactor some base Funnel query components (#5037)
Browse files Browse the repository at this point in the history
  • Loading branch information
neilkakkar authored Jul 8, 2021
1 parent 6394988 commit 7954477
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 95 deletions.
50 changes: 14 additions & 36 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,25 @@ def run(self, *args, **kwargs):
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
relevant_people = []
total_people = 0

for step in reversed(self._filter.entities):
# Clickhouse step order starts at one, hence the +1
result_step = [x for x in results if step.order + 1 == x[0]]
if len(result_step) > 0:
total_people += result_step[0][1]
relevant_people += result_step[0][2]
steps.append(self._serialize_step(step, total_people, relevant_people[0:100]))

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse

def _exec_query(self) -> List[Tuple]:
prop_filters, prop_filter_params = parse_prop_clauses(
self._filter.properties,
self._team.pk,
prepend="global",
allow_denormalized_props=True,
filter_test_accounts=self._filter.filter_test_accounts,
)

# format default dates
data = {}
Expand All @@ -76,27 +74,7 @@ def _exec_query(self) -> List[Tuple]:
data.update({"date_to": timezone.now()})
self._filter = self._filter.with_data(data)

parsed_date_from, parsed_date_to, _ = parse_timestamps(
filter=self._filter, table="events.", team_id=self._team.pk
)
self.params.update(prop_filter_params)
steps = [self._build_step_query(entity, index, "events") for index, entity in enumerate(self._filter.entities)]

format_properties = {
"team_id": self._team.id,
"steps": ", ".join(steps),
"filters": prop_filters.replace("uuid IN", "events.uuid IN", 1),
"parsed_date_from": parsed_date_from,
"parsed_date_to": parsed_date_to,
"top_level_groupby": "",
"extra_select": "",
"extra_groupby": "",
"within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days),
"latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL,
"offset": self._filter.offset,
}

query = self.get_query(format_properties)
query = self.get_query()

return sync_execute(query, self.params)

Expand Down Expand Up @@ -256,7 +234,7 @@ def _get_step_time_avgs(self, max_steps: int):
return f", {formatted}" if formatted else ""

@abstractmethod
def get_query(self, format_properties):
def get_query(self):
pass

def get_step_counts_query(self):
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class ClickhouseFunnel(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):

steps_per_person_query = self.get_step_counts_query()
max_steps = len(self._filter.entities)
Expand Down
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelPersons(ClickhouseFunnel):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down
24 changes: 1 addition & 23 deletions ee/clickhouse/queries/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class ClickhouseFunnelStrict(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):
max_steps = len(self._filter.entities)
return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM (
Expand Down Expand Up @@ -56,25 +56,3 @@ def _get_partition_cols(self, level_index: int, max_steps: int):
f"min(latest_{i}) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) latest_{i}"
)
return ", ".join(cols)

# TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
total_people = 0

for step in reversed(self._filter.entities):

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_strict_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelStrictPersons(ClickhouseFunnelStrict):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_time_to_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, filter: Filter, team: Team, funnel_order_class: Type[Clickhou
def _format_results(self, results: list) -> list:
return results

def get_query(self, format_properties) -> str:
def get_query(self) -> str:
steps_per_person_query = self.funnel_order.get_step_counts_query()
self.params.update(self.funnel_order.params)
# expects 1 person per row, whatever their max step is, and the step conversion times for this person
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run(self, *args, **kwargs):
def perform_query(self):
return self._summarize_data(self._exec_query())

def get_query(self, format_properties) -> str:
def get_query(self) -> str:

steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query()
# expects multiple rows for same person, first event time, steps taken.
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_trends_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


class ClickhouseFunnelTrendsPersons(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):
pass
24 changes: 1 addition & 23 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase):
Here, `step_i` (0 indexed) signifies the number of people that did at least `i+1` steps.
"""

def get_query(self, format_properties):
def get_query(self):

max_steps = len(self._filter.entities)

Expand Down Expand Up @@ -114,25 +114,3 @@ def get_sorting_condition(self, max_steps: int):
return f"arraySum([{','.join(basic_conditions)}, 1])"
else:
return "1"

# TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
total_people = 0

for step in reversed(self._filter.entities):

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_unordered_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelUnorderedPersons(ClickhouseFunnelUnordered):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down

0 comments on commit 7954477

Please sign in to comment.