Skip to content

Commit

Permalink
Group reamining breakdown values into "Other" for funnels (#5538)
Browse files Browse the repository at this point in the history
* breakdown grouping

* resolve pagination woes

* explain the offset override

* document assumption
  • Loading branch information
neilkakkar authored Aug 12, 2021
1 parent 19c68ed commit a245c15
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 28 deletions.
44 changes: 23 additions & 21 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,30 +397,32 @@ def _get_cohort_breakdown_join(self) -> str:

def _get_breakdown_conditions(self) -> str:
if self._filter.breakdown:
if self._filter.funnel_step_breakdown:
values = self._parse_breakdown_prop_value()
else:
limit = self._filter.breakdown_limit_or_default
first_entity = next(x for x in self._filter.entities if x.order == 0)
if not first_entity:
ValidationError("An entity with order 0 was not provided")
values = []
if self._filter.breakdown_type == "person":
values = get_breakdown_person_prop_values(
self._filter, first_entity, "count(*)", self._team.pk, limit
)
elif self._filter.breakdown_type == "event":
values = get_breakdown_event_prop_values(
self._filter, first_entity, "count(*)", self._team.pk, limit
)
limit = self._filter.breakdown_limit_or_default
first_entity = self._filter.entities[0]

values = []
if self._filter.breakdown_type == "person":
values = get_breakdown_person_prop_values(
self._filter, first_entity, "count(*)", self._team.pk, limit, extra_params={"offset": 0}
)
# people pagination sets the offset param, which is common across filters
# and gives us the wrong breakdown values here, so we override it.
elif self._filter.breakdown_type == "event":
values = get_breakdown_event_prop_values(
self._filter, first_entity, "count(*)", self._team.pk, limit, extra_params={"offset": 0}
)
# We assume breakdown values remain stable across the funnel, so using
# just the first entity to get breakdown values is ok.

self.params.update({"breakdown_values": values})
return "prop IN %(breakdown_values)s"
else:
return ""

def _get_breakdown_prop(self) -> str:
return ""

def _get_breakdown_prop(self, group_remaining=False) -> str:
if self._filter.breakdown:
return ", prop"
if group_remaining and self._filter.breakdown_type != "cohort":
return ", if(has(%(breakdown_values)s, prop), prop, 'Other') as prop"
else:
return ", prop"
else:
return ""
6 changes: 4 additions & 2 deletions ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,15 @@ def get_step_counts_without_aggregation_query(self):
max_steps = len(self._filter.entities)
if max_steps >= 2:
formatted_query = self.build_step_subquery(2, max_steps)
breakdown_query = self._get_breakdown_prop()
else:
formatted_query = self._get_inner_event_query()
breakdown_query = self._get_breakdown_prop(group_remaining=True)

exclusion_clause = self._get_exclusion_condition()

return f"""
SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {exclusion_clause} {self._get_step_times(max_steps)} {self._get_breakdown_prop()} FROM (
SELECT *, {self._get_sorting_condition(max_steps, max_steps)} AS steps {exclusion_clause} {self._get_step_times(max_steps)} {breakdown_query} FROM (
{formatted_query}
) WHERE step_0 = 1
{'AND exclusion = 0' if exclusion_clause else ''}
Expand Down Expand Up @@ -172,7 +174,7 @@ def build_step_subquery(self, level_index: int, max_steps: int):
person_id,
timestamp,
{self._get_partition_cols(1, max_steps)}
{self._get_breakdown_prop()}
{self._get_breakdown_prop(group_remaining=True)}
FROM ({self._get_inner_event_query()})
"""
else:
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_step_counts_without_aggregation_query(self):

partition_select = self._get_partition_cols(1, max_steps)
sorting_condition = self._get_sorting_condition(max_steps, max_steps)
breakdown_clause = self._get_breakdown_prop()
breakdown_clause = self._get_breakdown_prop(group_remaining=True)

inner_query = f"""
SELECT
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_step_counts_without_aggregation_query(self):

partition_select = self._get_partition_cols(1, max_steps)
sorting_condition = self.get_sorting_condition(max_steps)
breakdown_clause = self._get_breakdown_prop()
breakdown_clause = self._get_breakdown_prop(group_remaining=True)
exclusion_clause = self._get_exclusion_condition()

for i in range(max_steps):
Expand Down
190 changes: 187 additions & 3 deletions ee/clickhouse/queries/funnels/test/breakdown_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,189 @@ def test_funnel_step_breakdown_event(self):
self.assertCountEqual(self._get_people_at_step(filter, 1, "Safari"), [person2.uuid, person3.uuid])
self.assertCountEqual(self._get_people_at_step(filter, 2, "Safari"), [person2.uuid])

def test_funnel_step_breakdown_event_with_other(self):

filters = {
"events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2},],
"insight": INSIGHT_FUNNELS,
"date_from": "2020-01-01",
"date_to": "2020-01-08",
"funnel_window_days": 7,
"breakdown_type": "event",
"breakdown": "$browser",
"breakdown_limit": 1,
}

filter = Filter(data=filters)
funnel = Funnel(filter, self.team)

# event
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person1",
properties={"key": "val", "$browser": "Chrome"},
timestamp="2020-01-01T12:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person1",
properties={"key": "val", "$browser": "Chrome"},
timestamp="2020-01-01T13:00:00Z",
)
_create_event(
team=self.team,
event="buy",
distinct_id="person1",
properties={"key": "val", "$browser": "Chrome"},
timestamp="2020-01-01T15:00:00Z",
)

person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person2",
properties={"key": "val", "$browser": "Safari"},
timestamp="2020-01-02T14:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person2",
properties={"key": "val", "$browser": "Safari"},
timestamp="2020-01-02T16:00:00Z",
)

person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person3",
properties={"key": "val", "$browser": "Safari"},
timestamp="2020-01-02T14:00:00Z",
)

person4 = _create_person(distinct_ids=["person4"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person4",
properties={"key": "val", "$browser": "random"},
timestamp="2020-01-02T14:00:00Z",
)

person5 = _create_person(distinct_ids=["person5"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person5",
properties={"key": "val", "$browser": "another one"},
timestamp="2020-01-02T15:00:00Z",
)

result = funnel.run()

people = result[0][0].pop("people")
self.assertCountEqual(
people, [person1.uuid, person4.uuid, person5.uuid] if Funnel == ClickhouseFunnel else []
)

self.assertEqual(
result[0],
[
{
"action_id": "sign up",
"name": "sign up",
"order": 0,
# popped people because flakey ordering for assertEqual
"count": 3,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Other",
"breakdown_value": "Other",
},
{
"action_id": "play movie",
"name": "play movie",
"order": 1,
"people": [person1.uuid] if Funnel == ClickhouseFunnel else [], # backwards compatibility
"count": 1,
"type": "events",
"average_conversion_time": 3600.0,
"median_conversion_time": 3600.0,
"breakdown": "Other",
"breakdown_value": "Other",
},
{
"action_id": "buy",
"name": "buy",
"order": 2,
"people": [person1.uuid] if Funnel == ClickhouseFunnel else [], # backwards compatibility
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Other",
"breakdown_value": "Other",
},
],
)
self.assertCountEqual(
self._get_people_at_step(filter, 1, "Other"), [person1.uuid, person4.uuid, person5.uuid]
)
self.assertCountEqual(self._get_people_at_step(filter, 2, "Other"), [person1.uuid])

self.assertEqual(
result[1],
[
{
"action_id": "sign up",
"name": "sign up",
"order": 0,
"people": [person2.uuid, person3.uuid]
if Funnel == ClickhouseFunnel
else [], # backwards compatibility
"count": 2,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
"breakdown_value": "Safari",
},
{
"action_id": "play movie",
"name": "play movie",
"order": 1,
"people": [person2.uuid] if Funnel == ClickhouseFunnel else [], # backwards compatibility
"count": 1,
"type": "events",
"average_conversion_time": 7200.0,
"median_conversion_time": 7200.0,
"breakdown": "Safari",
"breakdown_value": "Safari",
},
{
"action_id": "buy",
"name": "buy",
"order": 2,
"people": [],
"count": 0,
"type": "events",
"average_conversion_time": None,
"median_conversion_time": None,
"breakdown": "Safari",
"breakdown_value": "Safari",
},
],
)

self.assertCountEqual(self._get_people_at_step(filter, 1, "Safari"), [person2.uuid, person3.uuid])
self.assertCountEqual(self._get_people_at_step(filter, 2, "Safari"), [person2.uuid])

def test_funnel_step_breakdown_event_no_type(self):

filters = {
Expand Down Expand Up @@ -510,7 +693,7 @@ def test_funnel_step_breakdown_limit(self):

# assert that we give 5 at a time at most and that those values are the most popular ones
breakdown_vals = sorted([res[0]["breakdown"] for res in result])
self.assertEqual(["5", "6", "7", "8", "9"], breakdown_vals)
self.assertEqual(["5", "6", "7", "8", "9", "Other"], breakdown_vals)

def test_funnel_step_custom_breakdown_limit_with_nulls(self):

Expand Down Expand Up @@ -554,7 +737,7 @@ def test_funnel_step_custom_breakdown_limit_with_nulls(self):
)

# no breakdown value for this guy
_create_person(distinct_ids=[f"person_null"], team_id=self.team.pk)
person0 = _create_person(distinct_ids=[f"person_null"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
Expand All @@ -580,8 +763,9 @@ def test_funnel_step_custom_breakdown_limit_with_nulls(self):
result = funnel.run()

breakdown_vals = sorted([res[0]["breakdown"] for res in result])
self.assertEqual(["2", "3", "4"], breakdown_vals)
self.assertEqual(["2", "3", "4", "Other"], breakdown_vals)
# skipped 1 and '' because the limit was 3.
self.assertTrue(person0.uuid in self._get_people_at_step(filter, 1, "Other"))

def test_funnel_step_custom_breakdown_limit_with_nulls_included(self):

Expand Down

0 comments on commit a245c15

Please sign in to comment.