Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor some base Funnel query components #5037

Merged
merged 9 commits into from
Jul 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 14 additions & 36 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,25 @@ def run(self, *args, **kwargs):
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
relevant_people = []
total_people = 0

for step in reversed(self._filter.entities):
# Clickhouse step order starts at one, hence the +1
result_step = [x for x in results if step.order + 1 == x[0]]
if len(result_step) > 0:
total_people += result_step[0][1]
relevant_people += result_step[0][2]
steps.append(self._serialize_step(step, total_people, relevant_people[0:100]))

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse

def _exec_query(self) -> List[Tuple]:
prop_filters, prop_filter_params = parse_prop_clauses(
self._filter.properties,
self._team.pk,
prepend="global",
allow_denormalized_props=True,
filter_test_accounts=self._filter.filter_test_accounts,
)

# format default dates
data = {}
Expand All @@ -76,27 +74,7 @@ def _exec_query(self) -> List[Tuple]:
data.update({"date_to": timezone.now()})
self._filter = self._filter.with_data(data)

parsed_date_from, parsed_date_to, _ = parse_timestamps(
filter=self._filter, table="events.", team_id=self._team.pk
)
self.params.update(prop_filter_params)
steps = [self._build_step_query(entity, index, "events") for index, entity in enumerate(self._filter.entities)]

format_properties = {
"team_id": self._team.id,
"steps": ", ".join(steps),
"filters": prop_filters.replace("uuid IN", "events.uuid IN", 1),
"parsed_date_from": parsed_date_from,
"parsed_date_to": parsed_date_to,
"top_level_groupby": "",
"extra_select": "",
"extra_groupby": "",
"within_time": FunnelWindowDaysMixin.microseconds_from_days(self._filter.funnel_window_days),
"latest_distinct_id_sql": GET_LATEST_PERSON_DISTINCT_ID_SQL,
"offset": self._filter.offset,
}

query = self.get_query(format_properties)
query = self.get_query()

return sync_execute(query, self.params)

Expand Down Expand Up @@ -256,7 +234,7 @@ def _get_step_time_avgs(self, max_steps: int):
return f", {formatted}" if formatted else ""

@abstractmethod
def get_query(self, format_properties):
def get_query(self):
pass

def get_step_counts_query(self):
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class ClickhouseFunnel(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):

steps_per_person_query = self.get_step_counts_query()
max_steps = len(self._filter.entities)
Expand Down
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelPersons(ClickhouseFunnel):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down
24 changes: 1 addition & 23 deletions ee/clickhouse/queries/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class ClickhouseFunnelStrict(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):
max_steps = len(self._filter.entities)
return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} FROM (
Expand Down Expand Up @@ -56,25 +56,3 @@ def _get_partition_cols(self, level_index: int, max_steps: int):
f"min(latest_{i}) over (PARTITION by person_id ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) latest_{i}"
)
return ", ".join(cols)

# TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
total_people = 0

for step in reversed(self._filter.entities):

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_strict_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelStrictPersons(ClickhouseFunnelStrict):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_time_to_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def __init__(self, filter: Filter, team: Team, funnel_order_class: Type[Clickhou
def _format_results(self, results: list) -> list:
return results

def get_query(self, format_properties) -> str:
def get_query(self) -> str:
steps_per_person_query = self.funnel_order.get_step_counts_query()
self.params.update(self.funnel_order.params)
# expects 1 person per row, whatever their max step is, and the step conversion times for this person
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_trends.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run(self, *args, **kwargs):
def perform_query(self):
return self._summarize_data(self._exec_query())

def get_query(self, format_properties) -> str:
def get_query(self) -> str:

steps_per_person_query = self.funnel_order.get_step_counts_without_aggregation_query()
# expects multiple rows for same person, first event time, steps taken.
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_trends_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@


class ClickhouseFunnelTrendsPersons(ClickhouseFunnelBase):
def get_query(self, format_properties):
def get_query(self):
pass
24 changes: 1 addition & 23 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class ClickhouseFunnelUnordered(ClickhouseFunnelBase):
Here, `step_i` (0 indexed) signifies the number of people that did at least `i+1` steps.
"""

def get_query(self, format_properties):
def get_query(self):

max_steps = len(self._filter.entities)

Expand Down Expand Up @@ -114,25 +114,3 @@ def get_sorting_condition(self, max_steps: int):
return f"arraySum([{','.join(basic_conditions)}, 1])"
else:
return "1"

# TODO: copied from funnel.py. Once the new funnel query replaces old one, the base format_results function can use this
def _format_results(self, results):
# Format of this is [step order, person count (that reached that step), array of person uuids]
steps = []
total_people = 0

for step in reversed(self._filter.entities):

if results[0] and len(results[0]) > 0:
total_people += results[0][step.order]

serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[0][step.order + len(self._filter.entities) - 1]}
)
else:
serialized_result.update({"average_conversion_time": None})
steps.append(serialized_result)

return steps[::-1] #  reverse
6 changes: 3 additions & 3 deletions ee/clickhouse/queries/funnels/funnel_unordered_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@


class ClickhouseFunnelUnorderedPersons(ClickhouseFunnelUnordered):
def get_query(self, format_properties):
def get_query(self):
return FUNNEL_PERSONS_BY_STEP_SQL.format(
**format_properties,
offset=self._filter.offset,
steps_per_person_query=self.get_step_counts_query(),
persons_steps=self._get_funnel_person_step_condition()
persons_steps=self._get_funnel_person_step_condition(),
)

def _format_results(self, results):
Expand Down