Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues with step conversion times #5174

Merged
merged 12 commits into from
Jul 22, 2021
8 changes: 8 additions & 0 deletions ee/clickhouse/queries/funnels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,14 @@ def _get_count_columns(self, max_steps: int):

return ", ".join(cols)

def _get_step_time_names(self, max_steps: int):
names = []
for i in range(1, max_steps):
names.append(f"step_{i}_conversion_time")

formatted = ",".join(names)
return f", {formatted}" if formatted else ""

def _get_step_time_avgs(self, max_steps: int, inner_query: bool = False):
conditions: List[str] = []
for i in range(1, max_steps):
Expand Down
11 changes: 8 additions & 3 deletions ee/clickhouse/queries/funnels/funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,14 @@ def get_step_counts_query(self):
max_steps = len(self._filter.entities)
breakdown_clause = self._get_breakdown_prop()

return f"""SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
{steps_per_person_query}
) GROUP BY person_id {breakdown_clause}
return f"""
SELECT person_id, steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
SELECT person_id, steps, max(steps) over (PARTITION BY person_id {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} FROM (
{steps_per_person_query}
)
) GROUP BY person_id, steps {breakdown_clause}
HAVING steps = max_steps
SETTINGS allow_experimental_window_functions = 1
"""

def _format_results(self, results):
Expand Down
12 changes: 7 additions & 5 deletions ee/clickhouse/queries/funnels/funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ def get_query(self):

def get_step_counts_query(self):

steps_per_person_query = self.get_step_counts_without_aggregation_query()
max_steps = len(self._filter.entities)

formatted_query = self.get_step_counts_without_aggregation_query()
breakdown_clause = self._get_breakdown_prop()

return f"""
SELECT person_id, max(steps) AS steps {self._get_step_time_avgs(max_steps, inner_query=True)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
{formatted_query}
) GROUP BY person_id {breakdown_clause}
SELECT person_id, steps {self._get_step_time_avgs(max_steps)} {self._get_step_time_median(max_steps, inner_query=True)} {breakdown_clause} FROM (
SELECT person_id, steps, max(steps) over (PARTITION BY person_id {breakdown_clause}) as max_steps {self._get_step_time_names(max_steps)} {breakdown_clause} FROM (
{steps_per_person_query}
)
) GROUP BY person_id, steps {breakdown_clause}
HAVING steps = max_steps
"""

def get_step_counts_without_aggregation_query(self):
Expand Down
8 changes: 0 additions & 8 deletions ee/clickhouse/queries/funnels/funnel_unordered.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,6 @@ def get_step_counts_without_aggregation_query(self):

return " UNION ALL ".join(union_queries)

def _get_step_time_names(self, max_steps: int):
names = []
for i in range(1, max_steps):
names.append(f"step_{i}_conversion_time")

formatted = ",".join(names)
return f", {formatted}" if formatted else ""

def _get_step_times(self, max_steps: int):
conditions: List[str] = []

Expand Down
155 changes: 155 additions & 0 deletions ee/clickhouse/queries/funnels/test/conversion_time_cases.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
from posthog.constants import INSIGHT_FUNNELS
from posthog.models.filters import Filter
from posthog.test.base import APIBaseTest


def funnel_conversion_time_test_factory(Funnel, FunnelPerson, _create_event, _create_person):
class TestFunnelConversionTime(APIBaseTest):
def _get_people_at_step(self, filter, funnel_step):
person_filter = filter.with_data({"funnel_step": funnel_step})
result = FunnelPerson(person_filter, self.team)._exec_query()
return [row[0] for row in result]

def test_funnel_with_multiple_incomplete_tries(self):
filters = {
"events": [
{"id": "user signed up", "type": "events", "order": 0},
{"id": "$pageview", "type": "events", "order": 1},
{"id": "something else", "type": "events", "order": 2},
],
"funnel_window_days": 1,
"date_from": "2021-05-01 00:00:00",
"date_to": "2021-05-14 00:00:00",
"insight": INSIGHT_FUNNELS,
}

filter = Filter(data=filters)
funnel = Funnel(filter, self.team)

# event
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(
team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-01 01:00:00"
)
_create_event(team=self.team, event="$pageview", distinct_id="person1", timestamp="2021-05-01 02:00:00")
_create_event(
team=self.team, event="something else", distinct_id="person1", timestamp="2021-05-01 03:00:00"
)
# person1 completed funnel on 2021-05-01

_create_event(
team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-03 04:00:00"
)
_create_event(team=self.team, event="$pageview", distinct_id="person1", timestamp="2021-05-03 06:00:00")
# person1 completed part of funnel on 2021-05-03 and took 2 hours to convert

_create_event(
team=self.team, event="user signed up", distinct_id="person1", timestamp="2021-05-04 07:00:00"
)
_create_event(team=self.team, event="$pageview", distinct_id="person1", timestamp="2021-05-04 10:00:00")
# person1 completed part of funnel on 2021-05-04 and took 3 hours to convert

result = funnel.run()

print(result)
self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[1]["name"], "$pageview")
self.assertEqual(result[2]["name"], "something else")
self.assertEqual(result[0]["count"], 1)
self.assertEqual(
result[1]["average_conversion_time"], 3600
) # one hour to convert, disregard the incomplete tries
self.assertEqual(result[1]["median_conversion_time"], 3600)

self.assertEqual(len(result[0]["people"]), 1)
# check ordering of people in every step
self.assertCountEqual(
self._get_people_at_step(filter, 1), [person1.uuid,],
)

def test_funnel_step_conversion_times(self):
filters = {
"events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2},],
"insight": INSIGHT_FUNNELS,
"date_from": "2020-01-01",
"date_to": "2020-01-08",
"funnel_window_days": 7,
}

filter = Filter(data=filters)
funnel = Funnel(filter, self.team)

# event
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T12:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T13:00:00Z",
)
_create_event(
team=self.team,
event="buy",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T15:00:00Z",
)

person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person2",
properties={"key": "val"},
timestamp="2020-01-02T14:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person2",
properties={"key": "val"},
timestamp="2020-01-02T16:00:00Z",
)

person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T14:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T16:00:00Z",
)
_create_event(
team=self.team,
event="buy",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T17:00:00Z",
)

result = funnel.run()

self.assertEqual(result[0]["average_conversion_time"], None)
self.assertEqual(result[1]["average_conversion_time"], 6000)
self.assertEqual(result[2]["average_conversion_time"], 5400)

self.assertEqual(result[0]["median_conversion_time"], None)
self.assertEqual(result[1]["median_conversion_time"], 7200)
self.assertEqual(result[2]["median_conversion_time"], 5400)

return TestFunnelConversionTime
95 changes: 8 additions & 87 deletions ee/clickhouse/queries/funnels/test/test_funnel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
from ee.clickhouse.client import sync_execute
from ee.clickhouse.models.event import create_event
from ee.clickhouse.queries.funnels.funnel import ClickhouseFunnel
from ee.clickhouse.queries.funnels.funnel_event_query import FunnelEventQuery
from ee.clickhouse.queries.funnels.funnel_persons import ClickhouseFunnelPersons
from ee.clickhouse.queries.funnels.test.breakdown_cases import funnel_breakdown_test_factory
from ee.clickhouse.queries.funnels.test.conversion_time_cases import funnel_conversion_time_test_factory
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import INSIGHT_FUNNELS
from posthog.models.action import Action
Expand All @@ -15,6 +17,7 @@
from posthog.models.filters import Filter
from posthog.models.person import Person
from posthog.queries.test.test_funnel import funnel_test_factory
from posthog.settings import SHELL_PLUS_PRINT_SQL

FORMAT_TIME = "%Y-%m-%d 00:00:00"
MAX_STEP_COLUMN = 0
Expand Down Expand Up @@ -46,6 +49,11 @@ class TestFunnelBreakdown(ClickhouseTestMixin, funnel_breakdown_test_factory(Cli
pass


class TestFunnelConversionTime(ClickhouseTestMixin, funnel_conversion_time_test_factory(ClickhouseFunnel, ClickhouseFunnelPersons, _create_event, _create_person)): # type: ignore
maxDiff = None
pass


class TestFunnel(ClickhouseTestMixin, funnel_test_factory(ClickhouseFunnel, _create_event, _create_person)): # type: ignore

maxDiff = None
Expand Down Expand Up @@ -111,7 +119,6 @@ def test_basic_funnel_with_repeat_steps(self):
_create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup2")

result = funnel.run()

self.assertEqual(result[0]["name"], "user signed up")
self.assertEqual(result[0]["count"], 2)
self.assertEqual(len(result[0]["people"]), 2)
Expand Down Expand Up @@ -853,92 +860,6 @@ def test_funnel_conversion_window(self):

self.assertCountEqual([str(id) for id in self._get_people_at_step(filter, 2)], ids_to_compare)

def test_funnel_step_conversion_times(self):

filters = {
"events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2},],
"insight": INSIGHT_FUNNELS,
"date_from": "2020-01-01",
"date_to": "2020-01-08",
"funnel_window_days": 7,
}

filter = Filter(data=filters)
funnel = ClickhouseFunnel(filter, self.team)

# event
person1 = _create_person(distinct_ids=["person1"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T12:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T13:00:00Z",
)
_create_event(
team=self.team,
event="buy",
distinct_id="person1",
properties={"key": "val"},
timestamp="2020-01-01T15:00:00Z",
)

person2 = _create_person(distinct_ids=["person2"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person2",
properties={"key": "val"},
timestamp="2020-01-02T14:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person2",
properties={"key": "val"},
timestamp="2020-01-02T16:00:00Z",
)

person3 = _create_person(distinct_ids=["person3"], team_id=self.team.pk)
_create_event(
team=self.team,
event="sign up",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T14:00:00Z",
)
_create_event(
team=self.team,
event="play movie",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T16:00:00Z",
)
_create_event(
team=self.team,
event="buy",
distinct_id="person3",
properties={"key": "val"},
timestamp="2020-01-02T17:00:00Z",
)

result = funnel.run()

self.assertEqual(result[0]["average_conversion_time"], None)
self.assertEqual(result[1]["average_conversion_time"], 6000)
self.assertEqual(result[2]["average_conversion_time"], 5400)

self.assertEqual(result[0]["median_conversion_time"], None)
self.assertEqual(result[1]["median_conversion_time"], 7200)
self.assertEqual(result[2]["median_conversion_time"], 5400)

def test_funnel_exclusions_invalid_params(self):
filters = {
"events": [
Expand Down
7 changes: 7 additions & 0 deletions ee/clickhouse/queries/funnels/test/test_funnel_strict.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ee.clickhouse.queries.funnels.funnel_strict import ClickhouseFunnelStrict
from ee.clickhouse.queries.funnels.funnel_strict_persons import ClickhouseFunnelStrictPersons
from ee.clickhouse.queries.funnels.test.breakdown_cases import funnel_breakdown_test_factory
from ee.clickhouse.queries.funnels.test.conversion_time_cases import funnel_conversion_time_test_factory
from ee.clickhouse.util import ClickhouseTestMixin
from posthog.constants import INSIGHT_FUNNELS
from posthog.models.action import Action
Expand Down Expand Up @@ -157,6 +158,12 @@ def test_strict_breakdown_events_with_multiple_properties(self):
self.assertCountEqual(self._get_people_at_step(filter, 2, "Safari"), [person2.uuid])


class TestFunnelStrictStepsConversionTime(ClickhouseTestMixin, funnel_conversion_time_test_factory(ClickhouseFunnelStrict, ClickhouseFunnelStrictPersons, _create_event, _create_person)): # type: ignore

maxDiff = None
pass


class TestFunnelStrictSteps(ClickhouseTestMixin, APIBaseTest):

maxDiff = None
Expand Down
Loading