Skip to content

Commit

Permalink
Add median time for funnel conversion data (#5203)
Browse files Browse the repository at this point in the history
* add median time for conversion steps

* patch tests

* fix time to convert funnel query

* fix strict and unordered

* add median conversion time to base

* fix

* more tests
  • Loading branch information
EDsCODE committed Jul 20, 2021
1 parent 13cbf9f commit cedf33c
Show file tree
Hide file tree
Showing 9 changed files with 80 additions and 18 deletions.
30 changes: 24 additions & 6 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ def _format_single_funnel(self, results, with_breakdown=False):
serialized_result = self._serialize_step(step, total_people, [])
if step.order > 0:
serialized_result.update(
{"average_conversion_time": results[step.order + len(self._filter.entities) - 1]}
{
"average_conversion_time": results[step.order + len(self._filter.entities) - 1],
"median_conversion_time": results[step.order + len(self._filter.entities) * 2 - 2],
}
)
else:
serialized_result.update({"average_conversion_time": None})
serialized_result.update({"average_conversion_time": None, "median_conversion_time": None})

if with_breakdown:
serialized_result.update({"breakdown": results[-1]})
Expand Down Expand Up @@ -113,14 +116,13 @@ def _exec_query(self) -> List[Tuple]:
self._filter = self._filter.with_data(data)

query = self.get_query()

return sync_execute(query, self.params)

def _get_step_times(self, max_steps: int):
conditions: List[str] = []
for i in range(1, max_steps):
conditions.append(
f"if(isNotNull(latest_{i}), dateDiff('second', toDateTime(latest_{i - 1}), toDateTime(latest_{i})), NULL) step_{i}_average_conversion_time"
f"if(isNotNull(latest_{i}), dateDiff('second', toDateTime(latest_{i - 1}), toDateTime(latest_{i})), NULL) step_{i}_conversion_time"
)

formatted = ", ".join(conditions)
Expand Down Expand Up @@ -316,10 +318,26 @@ def _get_count_columns(self, max_steps: int):

return ", ".join(cols)

def _get_step_time_avgs(self, max_steps: int):
def _get_step_time_avgs(self, max_steps: int, inner_query: bool = False):
conditions: List[str] = []
for i in range(1, max_steps):
conditions.append(f"avg(step_{i}_average_conversion_time) step_{i}_average_conversion_time")
conditions.append(
f"avg(step_{i}_conversion_time) step_{i}_average_conversion_time_inner"
if inner_query
else f"avg(step_{i}_average_conversion_time_inner) step_{i}_average_conversion_time"
)

formatted = ", ".join(conditions)
return f", {formatted}" if formatted else ""

def _get_step_time_median(self, max_steps: int, inner_query: bool = False):
conditions: List[str] = []
for i in range(1, max_steps):
conditions.append(
f"median(step_{i}_conversion_time) step_{i}_median_conversion_time_inner"
if inner_query
else f"median(step_{i}_median_conversion_time_inner) step_{i}_median_conversion_time"
)

formatted = ", ".join(conditions)
return f", {formatted}" if formatted else ""
Expand Down
13 changes: 9 additions & 4 deletions ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def get_query(self):
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_people_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
SELECT {self._get_count_columns(max_steps)} {self._get_people_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM (
{self.get_step_counts_query()}
) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1
"""
Expand All @@ -52,7 +52,7 @@ def get_step_counts_query(self):
max_steps = len(self._filter.entities)
breakdown_clause = self._get_breakdown_prop()

return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
{steps_per_person_query}
) GROUP BY person_id {breakdown_clause}
"""
Expand Down Expand Up @@ -82,9 +82,14 @@ def _format_single_funnel(self, result, with_breakdown=False):

serialized_result = self._serialize_step(step, total_people, relevant_people[0:100])
if step.order > 0:
serialized_result.update({"average_conversion_time": result[step.order + num_entities * 2 - 1]})
serialized_result.update(
{
"average_conversion_time": result[step.order + num_entities * 2 - 1],
"median_conversion_time": result[step.order + num_entities * 3 - 2],
}
)
else:
serialized_result.update({"average_conversion_time": None})
serialized_result.update({"average_conversion_time": None, "median_conversion_time": None})

if with_breakdown:
serialized_result.update(
Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_query(self):
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM (
{self.get_step_counts_query()}
) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1
"""
Expand All @@ -23,7 +23,7 @@ def get_step_counts_query(self):
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
{formatted_query}
) GROUP BY person_id {breakdown_clause}
"""
Expand Down
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/funnels/funnel_time_to_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def get_query(self) -> str:
)

steps_average_conversion_time_identifiers = [
f"step_{step+1}_average_conversion_time" for step in range(from_step, to_step)
f"step_{step+1}_average_conversion_time_inner" for step in range(from_step, to_step)
]
steps_average_conversion_time_expression_sum = " + ".join(steps_average_conversion_time_identifiers)

Expand Down Expand Up @@ -99,7 +99,7 @@ def get_query(self) -> str:
count() AS person_count
FROM step_runs
-- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
WHERE step_{to_step}_average_conversion_time IS NOT NULL
WHERE step_{to_step}_average_conversion_time_inner IS NOT NULL
GROUP BY bin_from_seconds
) results
FULL OUTER JOIN (
Expand Down
8 changes: 4 additions & 4 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_query(self):
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
SELECT {self._get_count_columns(max_steps)} {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps)} {breakdown_clause} FROM (
{self.get_step_counts_query()}
) {'GROUP BY prop' if breakdown_clause != '' else ''} SETTINGS allow_experimental_window_functions = 1
"""
Expand All @@ -43,7 +43,7 @@ def get_step_counts_query(self):
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT person_id, steps {self._get_step_time_avgs(max_steps)} {breakdown_clause} FROM (
SELECT person_id, steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
SELECT person_id, steps, max(steps) over (PARTITION BY person_id {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} FROM (
{union_query}
)
Expand Down Expand Up @@ -84,7 +84,7 @@ def get_step_counts_without_aggregation_query(self):
def _get_step_time_names(self, max_steps: int):
names = []
for i in range(1, max_steps):
names.append(f"step_{i}_average_conversion_time")
names.append(f"step_{i}_conversion_time")

formatted = ",".join(names)
return f", {formatted}" if formatted else ""
Expand All @@ -100,7 +100,7 @@ def _get_step_times(self, max_steps: int):

for i in range(1, max_steps):
conditions.append(
f"if(isNotNull(conversion_times[{i+1}]), dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_average_conversion_time"
f"if(isNotNull(conversion_times[{i+1}]), dateDiff('second', conversion_times[{i}], conversion_times[{i+1}]), NULL) step_{i}_conversion_time"
)
# array indices in ClickHouse are 1-based :shrug:

Expand Down
26 changes: 26 additions & 0 deletions ee/clickhouse/queries/funnels/test/breakdown_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ def test_funnel_step_breakdown_event(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
{
Expand All @@ -98,6 +99,7 @@ def test_funnel_step_breakdown_event(self):
"count": 1,
"type": "events",
"average_conversion_time": 3600.0,
"median_conversion_time": 3600.0,
"breakdown": "Chrome",
},
{
Expand All @@ -108,6 +110,7 @@ def test_funnel_step_breakdown_event(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Chrome",
},
],
Expand All @@ -127,6 +130,7 @@ def test_funnel_step_breakdown_event(self):
"count": 2,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
{
Expand All @@ -137,6 +141,7 @@ def test_funnel_step_breakdown_event(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Safari",
},
{
Expand All @@ -147,6 +152,7 @@ def test_funnel_step_breakdown_event(self):
"count": 0,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
],
Expand Down Expand Up @@ -230,6 +236,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
{
Expand All @@ -240,6 +247,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 1,
"type": "events",
"average_conversion_time": 3600.0,
"median_conversion_time": 3600.0,
"breakdown": "Chrome",
},
{
Expand All @@ -250,6 +258,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Chrome",
},
],
Expand All @@ -269,6 +278,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 2,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
{
Expand All @@ -279,6 +289,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Safari",
},
{
Expand All @@ -289,6 +300,7 @@ def test_funnel_step_breakdown_event_no_type(self):
"count": 0,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
],
Expand Down Expand Up @@ -364,6 +376,7 @@ def test_funnel_step_breakdown_person(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
{
Expand All @@ -374,6 +387,7 @@ def test_funnel_step_breakdown_person(self):
"count": 1,
"type": "events",
"average_conversion_time": 3600.0,
"median_conversion_time": 3600.0,
"breakdown": "Chrome",
},
{
Expand All @@ -384,6 +398,7 @@ def test_funnel_step_breakdown_person(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Chrome",
},
],
Expand All @@ -402,6 +417,7 @@ def test_funnel_step_breakdown_person(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
{
Expand All @@ -412,6 +428,7 @@ def test_funnel_step_breakdown_person(self):
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Safari",
},
{
Expand All @@ -422,6 +439,7 @@ def test_funnel_step_breakdown_person(self):
"count": 0,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
],
Expand Down Expand Up @@ -535,6 +553,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "0",
},
],
Expand All @@ -552,6 +571,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
],
Expand All @@ -569,6 +589,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Mac",
},
],
Expand All @@ -586,6 +607,7 @@ def test_funnel_step_breakdown_event_single_person_multiple_breakdowns(self):
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
],
Expand Down Expand Up @@ -650,6 +672,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
{
Expand All @@ -660,6 +683,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti
"count": 0,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Chrome",
},
],
Expand All @@ -678,6 +702,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti
"count": 1,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
},
{
Expand All @@ -688,6 +713,7 @@ def test_funnel_step_breakdown_event_single_person_events_with_multiple_properti
"count": 1,
"type": "events",
"average_conversion_time": 3600,
"median_conversion_time": 3600,
"breakdown": "Safari",
},
],
Expand Down
5 changes: 5 additions & 0 deletions ee/clickhouse/queries/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,10 +930,15 @@ def test_funnel_step_conversion_times(self):
)

result = funnel.run()

self.assertEqual(result[0]["average_conversion_time"], None)
self.assertEqual(result[1]["average_conversion_time"], 6000)
self.assertEqual(result[2]["average_conversion_time"], 5400)

self.assertEqual(result[0]["median_conversion_time"], None)
self.assertEqual(result[1]["median_conversion_time"], 7200)
self.assertEqual(result[2]["median_conversion_time"], 5400)

def test_funnel_exclusions_invalid_params(self):
filters = {
"events": [
Expand Down
Loading

0 comments on commit cedf33c

Please sign in to comment.