Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Paths Person API dropoff functionality #6124

Merged
merged 1 commit into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions ee/clickhouse/queries/paths/paths_persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,19 @@

class ClickhousePathsPersons(ClickhousePaths):
"""
`path_start_key` and `path_end_key` are two new params for this class.
These determine the start and end point of Paths you want. Both of these are optional.
`path_start_key`, `path_end_key`, and `path_dropoff_key` are three new params for this class.
These determine the start and end point of Paths you want. All of these are optional.

Not specifying them means "get me all users on this path query".

Only specifying `path_start_key` means "get me all users whose paths start at this key"
Only specifying `path_end_key` means "get me all users whose paths end at this key"

Specifying both means "get me all users whose path starts at `start_key` and ends at `end_key`."

Specifying `path_dropoff_key` means "get me users who dropped off after this key. If you specify
this key, the other two keys are invalid

Note that:
Persons are calculated only between direct paths. There should not be any
other path item between start and end key.
Expand Down Expand Up @@ -48,13 +52,18 @@ def get_query(self):

def get_person_path_filter(self) -> str:
conditions = []
if self._filter.path_start_key:
conditions.append("last_path_key = %(path_start_key)s")
self.params["path_start_key"] = self._filter.path_start_key

if self._filter.path_end_key:
conditions.append("path_key = %(path_end_key)s")
self.params["path_end_key"] = self._filter.path_end_key
if self._filter.path_dropoff_key:
conditions.append("path_dropoff_key = %(path_dropoff_key)s")
self.params["path_dropoff_key"] = self._filter.path_dropoff_key
else:
if self._filter.path_start_key:
conditions.append("last_path_key = %(path_start_key)s")
self.params["path_start_key"] = self._filter.path_start_key

if self._filter.path_end_key:
conditions.append("path_key = %(path_end_key)s")
self.params["path_end_key"] = self._filter.path_end_key

if conditions:
return " AND ".join(conditions)
Expand Down
54 changes: 52 additions & 2 deletions ee/clickhouse/queries/test/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ class TestClickhousePaths(ClickhouseTestMixin, paths_test_factory(ClickhousePath

maxDiff = None

def _get_people_at_path(self, filter, path_start, path_end, funnel_filter=None):
person_filter = filter.with_data({"path_start_key": path_start, "path_end_key": path_end})
def _get_people_at_path(self, filter, path_start=None, path_end=None, funnel_filter=None, path_dropoff=None):
person_filter = filter.with_data(
{"path_start_key": path_start, "path_end_key": path_end, "path_dropoff_key": path_dropoff}
)
result = ClickhousePathsPersons(person_filter, self.team, funnel_filter)._exec_query()
return [row[0] for row in result]

Expand Down Expand Up @@ -1494,3 +1496,51 @@ def test_path_grouping_with_evil_input(self):
{"source": "2_/2/bar/aaa", "target": "3_/3*", "value": 1, "average_conversion_time": 2 * ONE_MINUTE},
],
)

def test_paths_person_dropoffs(self):

# 5 people do 2 events
for i in range(5):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")

# 10 people do 3 events
for i in range(5, 15):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:05:00")

# 20 people do 4 events
for i in range(15, 35):
Person.objects.create(distinct_ids=[f"user_{i}"], team=self.team)
_create_event(event="step one", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:00:00")
_create_event(event="step two", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:04:00")
_create_event(event="step three", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:05:00")
_create_event(event="step four", distinct_id=f"user_{i}", team=self.team, timestamp="2021-05-01 00:06:00")

filter = PathFilter(
data={
"include_event_types": ["custom_event"],
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-07 00:00:00",
}
)
self.assertEqual(5, len(self._get_people_at_path(filter, path_dropoff="2_step two"))) # 5 dropoff at step 2
self.assertEqual(35, len(self._get_people_at_path(filter, path_end="2_step two"))) # 35 total reach step 2
self.assertEqual(
30, len(self._get_people_at_path(filter, path_start="2_step two"))
) # 30 total reach after step 2

self.assertEqual(10, len(self._get_people_at_path(filter, path_dropoff="3_step three"))) # 10 dropoff at step 3
self.assertEqual(30, len(self._get_people_at_path(filter, path_end="3_step three"))) # 30 total reach step 3
self.assertEqual(
20, len(self._get_people_at_path(filter, path_start="3_step three"))
) # 20 total reach after step 3

self.assertEqual(20, len(self._get_people_at_path(filter, path_dropoff="4_step four"))) # 20 dropoff at step 4
self.assertEqual(20, len(self._get_people_at_path(filter, path_end="4_step four"))) # 20 total reach step 4
self.assertEqual(
0, len(self._get_people_at_path(filter, path_start="4_step four"))
) # 0 total reach after step 4
4 changes: 3 additions & 1 deletion ee/clickhouse/sql/paths/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
conversion_time,
event_in_session_index,
concat(toString(event_in_session_index), '_', path) as path_key,
if(event_in_session_index > 1, neighbor(path_key, -1), null) AS last_path_key
if(event_in_session_index > 1, neighbor(path_key, -1), null) AS last_path_key,
path_dropoff_key
FROM (

SELECT person_id
Expand All @@ -19,6 +20,7 @@
{target_clause}
, arrayDifference(limited_timings) as timings_diff
, arrayZip(limited_path, timings_diff) as limited_path_timings
, concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */
FROM (
SELECT person_id
, path_time_tuple.1 as path_basic
Expand Down
1 change: 1 addition & 0 deletions posthog/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ class AvailableFeature(str, Enum):
PATH_GROUPINGS = "path_groupings"
PATH_START_KEY = "path_start_key"
PATH_END_KEY = "path_end_key"
PATH_DROPOFF_KEY = "path_dropoff_key"


class FunnelOrderType(str, Enum):
Expand Down
12 changes: 10 additions & 2 deletions posthog/models/filters/mixins/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
END_POINT,
FUNNEL_PATHS,
PAGEVIEW_EVENT,
PATH_DROPOFF_KEY,
PATH_END_KEY,
PATH_GROUPINGS,
PATH_START_KEY,
Expand Down Expand Up @@ -165,11 +166,15 @@ def path_groupings_to_dict(self):
class PathPersonsMixin(BaseParamMixin):
@cached_property
def path_start_key(self) -> Optional[str]:
return self._data.get(PATH_START_KEY, None)
return self._data.get(PATH_START_KEY)

@cached_property
def path_end_key(self) -> Optional[str]:
return self._data.get(PATH_END_KEY, None)
return self._data.get(PATH_END_KEY)

@cached_property
def path_dropoff_key(self) -> Optional[str]:
return self._data.get(PATH_DROPOFF_KEY)

@include_dict
def path_start_end_to_dict(self):
Expand All @@ -180,4 +185,7 @@ def path_start_end_to_dict(self):
if self.path_end_key:
result[PATH_END_KEY] = self.path_end_key

if self.path_dropoff_key:
result[PATH_DROPOFF_KEY] = self.path_dropoff_key

return result