diff --git a/frontend/__snapshots__/filters-propertyfilters--comparing-property-filters.png b/frontend/__snapshots__/filters-propertyfilters--comparing-property-filters.png index 620ccd6fb77e8..ae1ea8041e099 100644 Binary files a/frontend/__snapshots__/filters-propertyfilters--comparing-property-filters.png and b/frontend/__snapshots__/filters-propertyfilters--comparing-property-filters.png differ diff --git a/frontend/__snapshots__/filters-propertygroupfilters--group-property-filters.png b/frontend/__snapshots__/filters-propertygroupfilters--group-property-filters.png index bbec104c60598..3cdcb265ef5c5 100644 Binary files a/frontend/__snapshots__/filters-propertygroupfilters--group-property-filters.png and b/frontend/__snapshots__/filters-propertygroupfilters--group-property-filters.png differ diff --git a/frontend/src/lib/components/PropertyFilters/components/OperatorValueSelect.tsx b/frontend/src/lib/components/PropertyFilters/components/OperatorValueSelect.tsx index ca6b089ff3935..1ee5265a9e5bb 100644 --- a/frontend/src/lib/components/PropertyFilters/components/OperatorValueSelect.tsx +++ b/frontend/src/lib/components/PropertyFilters/components/OperatorValueSelect.tsx @@ -78,15 +78,15 @@ export function OperatorValueSelect({ const [operators, setOperators] = useState([] as Array) useEffect(() => { - const isAutocaptureElementProperty = propkey === 'selector' + const limitedElementProperty = propkey === 'selector' || propkey === 'tag_name' const operatorMapping: Record = chooseOperatorMap( - isAutocaptureElementProperty ? PropertyType.Selector : propertyDefinition?.property_type + limitedElementProperty ? PropertyType.Selector : propertyDefinition?.property_type ) - setOperators(Object.keys(operatorMapping) as Array) + const operators = Object.keys(operatorMapping) as Array + setOperators(operators) if (currentOperator !== operator) { setCurrentOperator(startingOperator) - } - if (isAutocaptureElementProperty) { + } else if (limitedElementProperty && !operators.includes(currentOperator)) { setCurrentOperator(PropertyOperator.Exact) } }, [propertyDefinition, propkey, operator]) diff --git a/frontend/src/queries/nodes/DataNode/dataNodeLogic.ts b/frontend/src/queries/nodes/DataNode/dataNodeLogic.ts index 738d89edd07b1..62f6637d6dd02 100644 --- a/frontend/src/queries/nodes/DataNode/dataNodeLogic.ts +++ b/frontend/src/queries/nodes/DataNode/dataNodeLogic.ts @@ -223,8 +223,8 @@ export const dataNodeLogic = kea([ return null } if (isEventsQuery(query) && !query.before) { - const sortKey = query.orderBy?.[0] ?? '-timestamp' - if (sortKey === '-timestamp') { + const sortKey = query.orderBy?.[0] ?? 'timestamp DESC' + if (sortKey === 'timestamp DESC') { const sortColumnIndex = query.select .map((hql) => removeExpressionComment(hql)) .indexOf('timestamp') @@ -249,9 +249,9 @@ export const dataNodeLogic = kea([ (query, response, responseError, dataLoading): DataNode | null => { if (isEventsQuery(query) && !responseError && !dataLoading) { if ((response as EventsQuery['response'])?.hasMore) { - const sortKey = query.orderBy?.[0] ?? '-timestamp' + const sortKey = query.orderBy?.[0] ?? 'timestamp DESC' const typedResults = (response as EventsQuery['response'])?.results - if (sortKey === '-timestamp') { + if (sortKey === 'timestamp DESC') { const sortColumnIndex = query.select .map((hql) => removeExpressionComment(hql)) .indexOf('timestamp') diff --git a/frontend/src/queries/nodes/DataTable/ColumnConfigurator/ColumnConfigurator.tsx b/frontend/src/queries/nodes/DataTable/ColumnConfigurator/ColumnConfigurator.tsx index 613be887c0417..b6608bfb7dfa2 100644 --- a/frontend/src/queries/nodes/DataTable/ColumnConfigurator/ColumnConfigurator.tsx +++ b/frontend/src/queries/nodes/DataTable/ColumnConfigurator/ColumnConfigurator.tsx @@ -45,7 +45,7 @@ export function ColumnConfigurator({ query, setQuery }: ColumnConfiguratorProps) let orderBy = query.source.orderBy if (orderBy && orderBy.length > 0) { const orderColumn = removeExpressionComment( - orderBy[0].startsWith('-') ? orderBy[0].slice(1) : orderBy[0] + orderBy[0].endsWith(' DESC') ? orderBy[0].replace(/ DESC$/, '') : orderBy[0] ) // the old orderBy column was removed, so remove it from the new query if (!columns.some((c) => removeExpressionComment(c) === orderColumn)) { diff --git a/frontend/src/queries/nodes/DataTable/DataTable.tsx b/frontend/src/queries/nodes/DataTable/DataTable.tsx index 298fffde66a7b..b9d76a44f1459 100644 --- a/frontend/src/queries/nodes/DataTable/DataTable.tsx +++ b/frontend/src/queries/nodes/DataTable/DataTable.tsx @@ -133,7 +133,7 @@ export function DataTable({ query, setQuery, context }: DataTableProps): JSX.Ele if (hogQl && isEventsQuery(query.source)) { const isAggregation = isHogQlAggregation(hogQl) const isOrderBy = query.source?.orderBy?.[0] === key - const isDescOrderBy = query.source?.orderBy?.[0] === `-${key}` + const isDescOrderBy = query.source?.orderBy?.[0] === `${key} DESC` setQuery?.({ ...query, source: { @@ -143,7 +143,7 @@ export function DataTable({ query, setQuery, context }: DataTableProps): JSX.Ele .filter((c) => (isAggregation ? c !== '*' : true)), orderBy: isOrderBy || isDescOrderBy - ? [isDescOrderBy ? `-${hogQl}` : hogQl] + ? [isDescOrderBy ? `${hogQl} DESC` : hogQl] : query.source?.orderBy, }, }) @@ -173,14 +173,14 @@ export function DataTable({ query, setQuery, context }: DataTableProps): JSX.Ele { setQuery?.({ ...query, source: { ...query.source, - orderBy: [`-${key}`], + orderBy: [`${key} DESC`], } as EventsQuery, }) }} diff --git a/frontend/src/queries/nodes/DataTable/dataTableLogic.test.ts b/frontend/src/queries/nodes/DataTable/dataTableLogic.test.ts index efa5ea2df0217..a802005d6d7aa 100644 --- a/frontend/src/queries/nodes/DataTable/dataTableLogic.test.ts +++ b/frontend/src/queries/nodes/DataTable/dataTableLogic.test.ts @@ -75,7 +75,7 @@ describe('dataTableLogic', () => { logic.mount() await expectLogic(logic).toMatchValues({ sourceKind: NodeKind.EventsQuery, - orderBy: ['-timestamp'], + orderBy: ['timestamp DESC'], }) // change props diff --git a/frontend/src/queries/nodes/DataTable/dataTableLogic.ts b/frontend/src/queries/nodes/DataTable/dataTableLogic.ts index e4bf1edbf9c3d..443ba56568028 100644 --- a/frontend/src/queries/nodes/DataTable/dataTableLogic.ts +++ b/frontend/src/queries/nodes/DataTable/dataTableLogic.ts @@ -53,7 +53,8 @@ export const dataTableLogic = kea([ sourceKind: [(_, p) => [p.query], (query): NodeKind | null => query.source?.kind], orderBy: [ (_, p) => [p.query], - (query): string[] | null => (isEventsQuery(query.source) ? query.source.orderBy || ['-timestamp'] : null), + (query): string[] | null => + isEventsQuery(query.source) ? query.source.orderBy || ['timestamp DESC'] : null, { resultEqualityCheck: objectsEqual }, ], columnsInResponse: [ @@ -79,7 +80,9 @@ export const dataTableLogic = kea([ } const { results } = eventsQueryResponse - const orderKey = orderBy?.[0]?.startsWith('-') ? orderBy[0].slice(1) : orderBy?.[0] + const orderKey = orderBy?.[0]?.endsWith(' DESC') + ? orderBy[0].replace(/ DESC$/, '') + : orderBy?.[0] const orderKeyIndex = columnsInResponse?.findIndex( (column) => diff --git a/frontend/src/queries/nodes/DataTable/utils.ts b/frontend/src/queries/nodes/DataTable/utils.ts index 159915ab4dc2d..3464cbc2e1e71 100644 --- a/frontend/src/queries/nodes/DataTable/utils.ts +++ b/frontend/src/queries/nodes/DataTable/utils.ts @@ -4,7 +4,7 @@ import { isEventsQuery } from '~/queries/utils' export const defaultDataTableEventColumns: HogQLExpression[] = [ '*', 'event', - 'distinct_id', + 'person', 'coalesce(properties.$current_url, properties.$screen_name) -- Url / Screen', 'properties.$lib', 'timestamp', diff --git a/frontend/src/scenes/events/eventsSceneLogic.tsx b/frontend/src/scenes/events/eventsSceneLogic.tsx index 13aac5174d6ec..fc02d2c85bc9d 100644 --- a/frontend/src/scenes/events/eventsSceneLogic.tsx +++ b/frontend/src/scenes/events/eventsSceneLogic.tsx @@ -15,7 +15,7 @@ export const getDefaultEventsSceneQuery = (): DataTableNode => ({ source: { kind: NodeKind.EventsQuery, select: defaultDataTableColumns(NodeKind.EventsQuery), - orderBy: ['-timestamp'], + orderBy: ['timestamp DESC'], after: '-24h', limit: 100, }, diff --git a/posthog/api/event.py b/posthog/api/event.py index f24fc829fd063..a6c265d450e9e 100644 --- a/posthog/api/event.py +++ b/posthog/api/event.py @@ -18,12 +18,8 @@ from posthog.api.routing import StructuredViewSetMixin from posthog.client import query_with_columns, sync_execute from posthog.models import Element, Filter, Person -from posthog.models.event.query_event_list import ( - QUERY_DEFAULT_EXPORT_LIMIT, - QUERY_DEFAULT_LIMIT, - QUERY_MAXIMUM_LIMIT, - query_events_list, -) +from posthog.models.event.events_query import QUERY_DEFAULT_EXPORT_LIMIT, QUERY_DEFAULT_LIMIT, QUERY_MAXIMUM_LIMIT +from posthog.models.event.query_event_list import query_events_list from posthog.models.event.sql import GET_CUSTOM_EVENTS, SELECT_ONE_EVENT_SQL from posthog.models.event.util import ClickhouseEventSerializer from posthog.models.person.util import get_persons_by_distinct_ids diff --git a/posthog/api/query.py b/posthog/api/query.py index 535fdd80e3d77..13b6002024bc2 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -16,7 +16,7 @@ from posthog.cloud_utils import is_cloud from posthog.hogql.query import execute_hogql_query from posthog.models import Team, User -from posthog.models.event.query_event_list import run_events_query +from posthog.models.event.events_query import run_events_query from posthog.permissions import ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission from posthog.rate_limit import ClickHouseBurstRateThrottle, ClickHouseSustainedRateThrottle from posthog.schema import EventsQuery, HogQLQuery diff --git a/posthog/api/test/__snapshots__/test_query.ambr b/posthog/api/test/__snapshots__/test_query.ambr index 34702611c2ded..40b336754f9e7 100644 --- a/posthog/api/test/__snapshots__/test_query.ambr +++ b/posthog/api/test/__snapshots__/test_query.ambr @@ -7,10 +7,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 66), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_event_property_filter.1 @@ -22,11 +22,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND (has(['test_val3'], replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''))) + WHERE and(equals(team_id, 66), equals(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), 'test_val3'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_event_property_filter.2 @@ -38,11 +37,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND (replaceRegexpAll(JSONExtractRaw(properties, 'path'), '^"|"$', '') ILIKE '%/%') + WHERE and(equals(team_id, 66), ilike(replaceRegexpAll(JSONExtractRaw(properties, 'path'), '^"|"$', ''), '%/%'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_event_property_filter_materialized @@ -54,10 +52,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 67), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_event_property_filter_materialized.1 @@ -69,11 +67,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND (has(['test_val3'], "mat_key")) + WHERE and(equals(team_id, 67), equals(mat_key, 'test_val3'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_event_property_filter_materialized.2 @@ -85,11 +82,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND ("mat_path" ILIKE '%/%') + WHERE and(equals(team_id, 67), ilike(mat_path, '%/%'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_full_hogql_query @@ -101,7 +97,7 @@ FROM events WHERE equals(team_id, 68) ORDER BY timestamp ASC - LIMIT 1000 + LIMIT 100 ' --- # name: TestQuery.test_full_hogql_query_materialized @@ -113,7 +109,7 @@ FROM events WHERE equals(team_id, 69) ORDER BY timestamp ASC - LIMIT 1000 + LIMIT 100 ' --- # name: TestQuery.test_hogql_property_filter @@ -125,10 +121,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 70), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter.1 @@ -140,11 +136,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals('a%sd', 'foo') + WHERE and(equals(team_id, 70), equals('a%sd', 'foo'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter.2 @@ -156,11 +151,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals('a%sd', 'a%sd') + WHERE and(equals(team_id, 70), equals('a%sd', 'a%sd'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter.3 @@ -172,11 +166,10 @@ 'a%sd', concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), 'test_val2') + WHERE and(equals(team_id, 70), equals(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), 'test_val2'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter_materialized @@ -188,10 +181,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 71), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter_materialized.1 @@ -203,11 +196,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals('a%sd', 'foo') + WHERE and(equals(team_id, 71), equals('a%sd', 'foo'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter_materialized.2 @@ -219,11 +211,10 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals('a%sd', 'a%sd') + WHERE and(equals(team_id, 71), equals('a%sd', 'a%sd'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_hogql_property_filter_materialized.3 @@ -235,46 +226,39 @@ 'a%sd', concat(event, ' ', mat_key) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND equals(mat_key, 'test_val2') + WHERE and(equals(team_id, 71), equals(mat_key, 'test_val2'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_person_property_filter ' /* user_id:0 request:_snapshot_ */ SELECT event, - distinct_id, - replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), + events.distinct_id, + replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', ''), 'a%sd', - concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) + concat(event, ' ', replaceRegexpAll(JSONExtractRaw(events.properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND (distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM - (SELECT id, - argMax(properties, person._timestamp) as properties, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING is_deleted = 0) - WHERE has(['tom@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')) ) )) + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, version) AS person_id, + distinct_id + FROM person_distinct_id2 + WHERE equals(team_id, 73) + GROUP BY distinct_id + HAVING equals(argMax(is_deleted, version), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + INNER JOIN + (SELECT argMax(person.properties, version) AS properties, + id + FROM person + WHERE equals(team_id, 73) + GROUP BY id + HAVING equals(argMax(is_deleted, version), 0)) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(team_id, 73), equals(replaceRegexpAll(JSONExtractRaw(events__pdi__person.properties, 'email'), '^"|"$', ''), 'tom@posthog.com'), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) ORDER BY event ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_property_filter_aggregations @@ -283,11 +267,11 @@ SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), count(*) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 74), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') - ORDER BY count() DESC + ORDER BY count(*) DESC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_property_filter_aggregations.1 @@ -296,12 +280,12 @@ SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), count(*) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 74), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') - HAVING greater(count(*), 1) - ORDER BY count() DESC + HAVING and(greater(count(*), 1)) + ORDER BY count(*) DESC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_property_filter_aggregations_materialized @@ -310,11 +294,11 @@ SELECT mat_key, count(*) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 75), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY mat_key - ORDER BY count() DESC + ORDER BY count(*) DESC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_property_filter_aggregations_materialized.1 @@ -323,38 +307,53 @@ SELECT mat_key, count(*) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 75), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY mat_key - HAVING greater(count(*), 1) - ORDER BY count() DESC + HAVING and(greater(count(*), 1)) + ORDER BY count(*) DESC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_select_hogql_expressions ' /* user_id:0 request:_snapshot_ */ - SELECT event, + SELECT replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), + event, distinct_id, - replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), concat(event, ' ', replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '')) FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - ORDER BY event ASC + WHERE and(equals(team_id, 76), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) + ORDER BY replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', '') ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_select_hogql_expressions.1 ' /* user_id:0 request:_snapshot_ */ - SELECT tuple(uuid, event, properties, timestamp, team_id, distinct_id, elements_chain, created_at), + SELECT tuple(uuid, event, events.properties, timestamp, team_id, events.distinct_id, elements_chain, events.created_at, events__pdi.person_id, events__pdi__person.created_at, events__pdi__person.properties), event FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - ORDER BY tuple(uuid, event, properties, timestamp, team_id, distinct_id, elements_chain, created_at) ASC + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, version) AS person_id, + distinct_id + FROM person_distinct_id2 + WHERE equals(team_id, 76) + GROUP BY distinct_id + HAVING equals(argMax(is_deleted, version), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + INNER JOIN + (SELECT argMax(person.created_at, version) AS created_at, + argMax(person.properties, version) AS properties, + id + FROM person + WHERE equals(team_id, 76) + GROUP BY id + HAVING equals(argMax(is_deleted, version), 0)) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + WHERE and(equals(team_id, 76), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) + ORDER BY tuple(uuid, event, events.properties, timestamp, team_id, events.distinct_id, elements_chain, events.created_at, events__pdi.person_id, events__pdi__person.created_at, events__pdi__person.properties) ASC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_select_hogql_expressions.2 @@ -363,11 +362,11 @@ SELECT count(*), event FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' + WHERE and(equals(team_id, 76), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY event - ORDER BY count() DESC + ORDER BY count(*) DESC LIMIT 101 + OFFSET 0 ' --- # name: TestQuery.test_select_hogql_expressions.3 @@ -376,11 +375,10 @@ SELECT count(*), event FROM events - WHERE team_id = 2 - AND timestamp < '2020-01-10 12:14:05.000000' - AND or(equals(event, 'sign up'), like(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), '%val2')) + WHERE and(equals(team_id, 76), or(equals(event, 'sign up'), like(replaceRegexpAll(JSONExtractRaw(properties, 'key'), '^"|"$', ''), '%val2')), less(timestamp, '2020-01-10 12:14:05.000000'), greater(timestamp, '2020-01-09 12:00:00.000000')) GROUP BY event ORDER BY count(*) DESC, event ASC LIMIT 101 + OFFSET 0 ' --- diff --git a/posthog/api/test/test_insight.py b/posthog/api/test/test_insight.py index 54bfb8d8a7c55..e354508f76fa8 100644 --- a/posthog/api/test/test_insight.py +++ b/posthog/api/test/test_insight.py @@ -2375,7 +2375,7 @@ def test_insight_trend_hogql_breakdown(self) -> None: data={ "events": json.dumps([{"id": "$pageview"}]), "breakdown_type": "hogql", - "breakdown": "ifElse(toInt(properties.int_value) < 10, 'le%ss', 'more')", + "breakdown": "if(toInt(properties.int_value) < 10, 'le%ss', 'more')", }, ) result = response.json()["result"] diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index 4b164743f5580..7a3876999a1ca 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -37,26 +37,26 @@ def test_select_hogql_expressions(self): with freeze_time("2020-01-10 12:11:00"): _create_event(team=self.team, event="sign out", distinct_id="2", properties={"key": "test_val2"}) with freeze_time("2020-01-10 12:12:00"): - _create_event(team=self.team, event="sign out", distinct_id="3", properties={"key": "test_val2"}) + _create_event(team=self.team, event="sign out", distinct_id="2", properties={"key": "test_val2"}) with freeze_time("2020-01-10 12:13:00"): - _create_event(team=self.team, event="sign out", distinct_id="4", properties={"key": "test_val3"}) + _create_event(team=self.team, event="sign out", distinct_id="2", properties={"key": "test_val3"}) flush_persons_and_events() with freeze_time("2020-01-10 12:14:00"): - query = EventsQuery(select=["event", "distinct_id", "properties.key", "concat(event, ' ', properties.key)"]) + query = EventsQuery(select=["properties.key", "event", "distinct_id", "concat(event, ' ', properties.key)"]) response = self.client.post(f"/api/projects/{self.team.id}/query/", query.dict()).json() self.assertEqual( response, { - "columns": ["event", "distinct_id", "properties.key", "concat(event, ' ', properties.key)"], + "columns": ["properties.key", "event", "distinct_id", "concat(event, ' ', properties.key)"], "hasMore": False, - "types": ["String", "String", "String", "String"], "results": [ - ["sign out", "4", "test_val3", "sign out test_val3"], - ["sign out", "3", "test_val2", "sign out test_val2"], - ["sign out", "2", "test_val2", "sign out test_val2"], - ["sign up", "2", "test_val1", "sign up test_val1"], + ["test_val1", "sign up", "2", "sign up test_val1"], + ["test_val2", "sign out", "2", "sign out test_val2"], + ["test_val2", "sign out", "2", "sign out test_val2"], + ["test_val3", "sign out", "2", "sign out test_val3"], ], + "types": ["String", "String", "String", "String"], }, ) @@ -83,7 +83,7 @@ def test_select_hogql_expressions(self): query.select = ["count()", "event"] query.where = ["event == 'sign up' or like(properties.key, '%val2')"] - query.orderBy = ["-count()", "event"] + query.orderBy = ["count() DESC", "event"] response = self.client.post(f"/api/projects/{self.team.id}/query/", query.dict()).json() self.assertEqual( response, diff --git a/posthog/hogql/ast.py b/posthog/hogql/ast.py index 55d13dccf5707..a2d076f8736ce 100644 --- a/posthog/hogql/ast.py +++ b/posthog/hogql/ast.py @@ -256,6 +256,8 @@ class CompareOperationType(str, Enum): NotILike = "not ilike" In = "in" NotIn = "not in" + Regex = "=~" + NotRegex = "!~" class CompareOperation(Expr): diff --git a/posthog/hogql/constants.py b/posthog/hogql/constants.py index 27e180bec6574..c48a712847633 100644 --- a/posthog/hogql/constants.py +++ b/posthog/hogql/constants.py @@ -59,6 +59,7 @@ "trimLeft": "trimLeft", "trimRight": "trimRight", "extractTextFromHTML": "extractTextFromHTML", + "match": "match", "like": "like", "ilike": "ilike", "notLike": "notLike", @@ -67,7 +68,8 @@ # array functions "tuple": "tuple", # conditional - "ifElse": "if", + "if": "if", + "not": "not", "multiIf": "multiIf", # rounding "round": "round", @@ -110,7 +112,11 @@ "distinct_id", "elements_chain", "created_at", + "person_id", + "person.created_at", + "person.properties", ] # Never return more rows than this in top level HogQL SELECT statements +DEFAULT_RETURNED_ROWS = 100 MAX_SELECT_RETURNED_ROWS = 65535 diff --git a/posthog/hogql/parser.py b/posthog/hogql/parser.py index de1eef6975045..caedee2133eb5 100644 --- a/posthog/hogql/parser.py +++ b/posthog/hogql/parser.py @@ -22,6 +22,19 @@ def parse_expr(expr: str, placeholders: Optional[Dict[str, ast.Expr]] = None, no return node +def parse_order_expr( + order_expr: str, placeholders: Optional[Dict[str, ast.Expr]] = None, no_placeholders=False +) -> ast.Expr: + parse_tree = get_parser(order_expr).orderExpr() + node = HogQLParseTreeConverter().visit(parse_tree) + if placeholders: + return replace_placeholders(node, placeholders) + elif no_placeholders: + assert_no_placeholders(node) + + return node + + def parse_select( statement: str, placeholders: Optional[Dict[str, ast.Expr]] = None, no_placeholders=False ) -> ast.SelectQuery: @@ -330,7 +343,9 @@ def visitColumnExprExtract(self, ctx: HogQLParser.ColumnExprExtractContext): raise NotImplementedError(f"Unsupported node: ColumnExprExtract") def visitColumnExprNegate(self, ctx: HogQLParser.ColumnExprNegateContext): - raise NotImplementedError(f"Unsupported node: ColumnExprNegate") + return ast.BinaryOperation( + op=ast.BinaryOperationType.Sub, left=ast.Constant(value=0), right=self.visit(ctx.columnExpr()) + ) def visitColumnExprSubquery(self, ctx: HogQLParser.ColumnExprSubqueryContext): return self.visit(ctx.selectUnionStmt()) diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index 32a42378eeaa1..db41645bbf0df 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -252,6 +252,10 @@ def visit_compare_operation(self, node: ast.CompareOperation): return f"in({left}, {right})" elif node.op == ast.CompareOperationType.NotIn: return f"not(in({left}, {right}))" + elif node.op == ast.CompareOperationType.Regex: + return f"match({left}, {right})" + elif node.op == ast.CompareOperationType.NotRegex: + return f"not(match({left}, {right}))" else: raise ValueError(f"Unknown CompareOperationType: {type(node.op).__name__}") diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py new file mode 100644 index 0000000000000..a0de118726e28 --- /dev/null +++ b/posthog/hogql/property.py @@ -0,0 +1,260 @@ +import re +from typing import Any, List, Optional, Union, cast + +from pydantic import BaseModel + +from posthog.constants import AUTOCAPTURE_EVENT, PropertyOperatorType +from posthog.hogql import ast +from posthog.hogql.constants import HOGQL_AGGREGATIONS +from posthog.hogql.parser import parse_expr +from posthog.hogql.visitor import TraversingVisitor +from posthog.models import Action, ActionStep, Property +from posthog.models.event import Selector +from posthog.models.property import PropertyGroup +from posthog.models.property.util import build_selector_regex +from posthog.schema import PropertyOperator + + +def has_aggregation(expr: ast.AST) -> bool: + finder = AggregationFinder() + finder.visit(expr) + return finder.has_aggregation + + +class AggregationFinder(TraversingVisitor): + def __init__(self): + super().__init__() + self.has_aggregation = False + + def visit(self, node): + if self.has_aggregation: + return + else: + super().visit(node) + + def visit_call(self, node: ast.Call): + if node.name in HOGQL_AGGREGATIONS: + self.has_aggregation = True + else: + for arg in node.args: + self.visit(arg) + + +def property_to_expr(property: Union[BaseModel, PropertyGroup, Property, dict, list]) -> ast.Expr: + if isinstance(property, dict): + property = Property(**property) + elif isinstance(property, list): + properties = [property_to_expr(p) for p in property] + if len(properties) == 1: + return properties[0] + return ast.And(exprs=properties) + elif isinstance(property, Property): + pass + elif isinstance(property, PropertyGroup): + if property.type == PropertyOperatorType.AND: + if len(property.values) == 1: + return property_to_expr(property.values[0]) + return ast.And(exprs=[property_to_expr(p) for p in property.values]) + if property.type == PropertyOperatorType.OR: + if len(property.values) == 1: + return property_to_expr(property.values[0]) + return ast.Or(exprs=[property_to_expr(p) for p in property.values]) + raise NotImplementedError(f'PropertyGroup of unknown type "{property.type}"') + elif isinstance(property, BaseModel): + property = Property(**property.dict()) + else: + raise NotImplementedError(f"property_to_expr with property of type {type(property).__name__} not implemented") + + if property.type == "hogql": + return parse_expr(property.key) + elif property.type == "event" or cast(Any, property.type) == "feature" or property.type == "person": + operator = cast(Optional[PropertyOperator], property.operator) or PropertyOperator.exact + value = property.value + if isinstance(value, list): + if len(value) == 1: + value = value[0] + else: + exprs = [ + property_to_expr( + Property(type=property.type, key=property.key, operator=property.operator, value=v) + ) + for v in value + ] + if ( + operator == PropertyOperator.is_not + or operator == PropertyOperator.not_icontains + or operator == PropertyOperator.not_regex + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) + + chain = ["person", "properties"] if property.type == "person" else ["properties"] + field = ast.Field(chain=chain + [property.key]) + + if operator == PropertyOperator.is_set: + return ast.CompareOperation(op=ast.CompareOperationType.NotEq, left=field, right=ast.Constant(value=None)) + elif operator == PropertyOperator.is_not_set: + return ast.CompareOperation(op=ast.CompareOperationType.Eq, left=field, right=ast.Constant(value=None)) + elif operator == PropertyOperator.icontains: + return ast.CompareOperation( + op=ast.CompareOperationType.ILike, + left=field, + right=ast.Constant(value=f"%{value}%"), + ) + elif operator == PropertyOperator.not_icontains: + return ast.CompareOperation( + op=ast.CompareOperationType.NotILike, + left=field, + right=ast.Constant(value=f"%{value}%"), + ) + elif operator == PropertyOperator.regex: + return ast.Call(name="match", args=[field, ast.Constant(value=value)]) + elif operator == PropertyOperator.not_regex: + return ast.Call(name="not", args=[ast.Call(name="match", args=[field, ast.Constant(value=value)])]) + elif operator == PropertyOperator.exact or operator == PropertyOperator.is_date_exact: + op = ast.CompareOperationType.Eq + elif operator == PropertyOperator.is_not: + op = ast.CompareOperationType.NotEq + elif operator == PropertyOperator.lt or operator == PropertyOperator.is_date_before: + op = ast.CompareOperationType.Lt + elif operator == PropertyOperator.gt or operator == PropertyOperator.is_date_after: + op = ast.CompareOperationType.Gt + elif operator == PropertyOperator.lte: + op = ast.CompareOperationType.LtE + elif operator == PropertyOperator.gte: + op = ast.CompareOperationType.GtE + else: + raise NotImplementedError(f"PropertyOperator {operator} not implemented") + + return ast.CompareOperation(op=op, left=field, right=ast.Constant(value=value)) + + elif property.type == "element": + value = property.value + operator = cast(Optional[PropertyOperator], property.operator) or PropertyOperator.exact + if isinstance(value, list): + if len(value) == 1: + value = value[0] + else: + exprs = [ + property_to_expr( + Property(type=property.type, key=property.key, operator=property.operator, value=v) + ) + for v in value + ] + if ( + operator == PropertyOperator.is_not + or operator == PropertyOperator.not_icontains + or operator == PropertyOperator.not_regex + ): + return ast.And(exprs=exprs) + return ast.Or(exprs=exprs) + + if property.key == "selector" or property.key == "tag_name": + if operator != PropertyOperator.exact and operator != PropertyOperator.is_not: + raise NotImplementedError( + f"property_to_expr for element {property.key} only supports exact and is_not operators, not {operator}" + ) + expr = selector_to_expr(str(value)) if property.key == "selector" else tag_name_to_expr(str(value)) + if operator == PropertyOperator.is_not: + return ast.Call(name="not", args=[expr]) + return expr + + if property.key == "href": + return element_chain_key_filter("href", str(value), operator) + + if property.key == "text": + return element_chain_key_filter("text", str(value), operator) + + raise NotImplementedError(f"property_to_expr for type element not implemented for key {property.key}") + # "cohort", + # "element", + # "static-cohort", + # "precalculated-cohort", + # "group", + # "recording", + # "behavioral", + # "session", + + raise NotImplementedError(f"property_to_expr not implemented for filter type {type(property).__name__}") + + +def action_to_expr(action: Action) -> ast.Expr: + steps = action.steps.all() + + if len(steps) == 0: + return ast.Constant(value=True) + + or_queries = [] + for step in steps: + exprs: List[ast.Expr] = [parse_expr("event = {event}", {"event": ast.Constant(value=step.event)})] + + if step.event == AUTOCAPTURE_EVENT: + if step.selector: + exprs.append(selector_to_expr(step.selector)) + if step.tag_name is not None: + exprs.append(tag_name_to_expr(step.tag_name)) + if step.href is not None: + exprs.append(element_chain_key_filter("href", step.href, PropertyOperator.exact)) + if step.text is not None: + exprs.append(element_chain_key_filter("text", step.text, PropertyOperator.exact)) + + if step.url: + if step.url_matching == ActionStep.EXACT: + expr = parse_expr("properties.$current_url = {url}", {"url": ast.Constant(value=step.url)}) + elif step.url_matching == ActionStep.REGEX: + expr = parse_expr("match(properties.$current_url, {regex})", {"regex": ast.Constant(value=step.url)}) + else: + expr = parse_expr("properties.$current_url like {url}", {"url": ast.Constant(value=f"%{step.url}%")}) + exprs.append(expr) + + if step.properties: + exprs.append(property_to_expr(step.properties)) + + if len(exprs) == 1: + or_queries.append(exprs[0]) + elif len(exprs) > 1: + or_queries.append(ast.And(exprs=exprs)) + + if len(or_queries) == 1: + return or_queries[0] + else: + return ast.Or(exprs=or_queries) + + +def element_chain_key_filter(key: str, text: str, operator: PropertyOperator): + escaped = text.replace('"', r"\"") + if operator == PropertyOperator.is_set or operator == PropertyOperator.is_not_set: + value = r'[^"]+' + elif operator == PropertyOperator.icontains or operator == PropertyOperator.not_icontains: + value = rf'[^"]*{re.escape(escaped)}[^"]*' + elif operator == PropertyOperator.regex or operator == PropertyOperator.not_regex: + value = escaped + elif operator == PropertyOperator.exact or operator == PropertyOperator.is_not: + value = re.escape(escaped) + else: + raise NotImplementedError(f"element_href_to_expr not implemented for operator {operator}") + optional_flag = ( + "(?i)" if operator == PropertyOperator.icontains or operator == PropertyOperator.not_icontains else "" + ) + regex = f'{optional_flag}({key}="{value}")' + expr = parse_expr("match(elements_chain, {regex})", {"regex": ast.Constant(value=str(regex))}) + if ( + operator == PropertyOperator.is_not_set + or operator == PropertyOperator.not_icontains + or operator == PropertyOperator.is_not + or operator == PropertyOperator.not_regex + ): + expr = ast.Call(name="not", args=[expr]) + return expr + + +def tag_name_to_expr(tag_name: str): + regex = rf"(^|;){tag_name}(\.|$|;|:)" + expr = parse_expr("match(elements_chain, {regex})", {"regex": ast.Constant(value=str(regex))}) + return expr + + +def selector_to_expr(selector: str): + regex = build_selector_regex(Selector(selector, escape_slashes=False)) + expr = parse_expr("match(elements_chain, {regex})", {"regex": ast.Constant(value=regex)}) + return expr diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 8f8b504a11c05..d3b7d9f9d25f0 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -1,9 +1,10 @@ -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, cast from pydantic import BaseModel, Extra from posthog.clickhouse.client.connection import Workload from posthog.hogql import ast +from posthog.hogql.constants import DEFAULT_RETURNED_ROWS from posthog.hogql.hogql import HogQLContext from posthog.hogql.parser import parse_select from posthog.hogql.placeholders import assert_no_placeholders, replace_placeholders @@ -44,7 +45,10 @@ def execute_hogql_query( assert_no_placeholders(select_query) if select_query.limit is None: - select_query.limit = ast.Constant(value=1000) + select_query.limit = ast.Constant(value=DEFAULT_RETURNED_ROWS) + + # Make a copy for hogql printing later. we don't want it to contain joined SQL tables for example + select_query_hogql = cast(ast.SelectQuery, clone_expr(select_query)) # Make a copy for hogql printing later. we don't want it to contain joined SQL tables for example select_query_hogql = clone_expr(select_query) diff --git a/posthog/hogql/test/test_parser.py b/posthog/hogql/test/test_parser.py index 2a1455d5538d6..2c98794d411c0 100644 --- a/posthog/hogql/test/test_parser.py +++ b/posthog/hogql/test/test_parser.py @@ -1,5 +1,5 @@ from posthog.hogql import ast -from posthog.hogql.parser import parse_expr, parse_select +from posthog.hogql.parser import parse_expr, parse_order_expr, parse_select from posthog.test.base import BaseTest @@ -621,6 +621,20 @@ def test_select_group_by(self): ), ) + def test_order_by(self): + self.assertEqual( + parse_order_expr("1 ASC"), + ast.OrderExpr(expr=ast.Constant(value=1), order="ASC"), + ) + self.assertEqual( + parse_order_expr("event"), + ast.OrderExpr(expr=ast.Field(chain=["event"]), order="ASC"), + ) + self.assertEqual( + parse_order_expr("timestamp DESC"), + ast.OrderExpr(expr=ast.Field(chain=["timestamp"]), order="DESC"), + ) + def test_select_order_by(self): self.assertEqual( parse_select("select 1 from events ORDER BY 1 ASC, event, timestamp DESC"), diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py new file mode 100644 index 0000000000000..c3a1da5fe5209 --- /dev/null +++ b/posthog/hogql/test/test_property.py @@ -0,0 +1,325 @@ +from typing import List, Union, cast + +from posthog.constants import PropertyOperatorType +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr +from posthog.hogql.property import ( + action_to_expr, + element_chain_key_filter, + has_aggregation, + property_to_expr, + selector_to_expr, + tag_name_to_expr, +) +from posthog.models import Action, ActionStep, Property +from posthog.models.property import PropertyGroup +from posthog.schema import HogQLPropertyFilter, PropertyOperator +from posthog.test.base import BaseTest + +elements_chain_match = lambda x: parse_expr("match(elements_chain, {regex})", {"regex": ast.Constant(value=str(x))}) +not_call = lambda x: ast.Call(name="not", args=[x]) + + +class TestProperty(BaseTest): + def test_has_aggregation(self): + self.assertEqual(has_aggregation(parse_expr("properties.a = 'b'")), False) + self.assertEqual(has_aggregation(parse_expr("if(1,2,3)")), False) + self.assertEqual(has_aggregation(parse_expr("if(1,2,avg(3))")), True) + self.assertEqual(has_aggregation(parse_expr("count()")), True) + self.assertEqual(has_aggregation(parse_expr("sum(properties.bla)")), True) + + def test_property_to_expr_hogql(self): + self.assertEqual(property_to_expr({"type": "hogql", "key": "1"}), ast.Constant(value=1)) + self.assertEqual(property_to_expr(Property(type="hogql", key="1")), ast.Constant(value=1)) + self.assertEqual(property_to_expr(HogQLPropertyFilter(type="hogql", key="1")), ast.Constant(value=1)) + + def test_property_to_expr_event(self): + self.assertEqual( + property_to_expr({"key": "a", "value": "b"}), + parse_expr("properties.a = 'b'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b"}), + parse_expr("properties.a = 'b'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "is_set"}), + parse_expr("properties.a is not null"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "is_not_set"}), + parse_expr("properties.a is null"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "exact"}), + parse_expr("properties.a = 'b'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "is_not"}), + parse_expr("properties.a != 'b'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "gt"}), + parse_expr("properties.a > '3'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "lt"}), + parse_expr("properties.a < '3'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "gte"}), + parse_expr("properties.a >= '3'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "lte"}), + parse_expr("properties.a <= '3'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "icontains"}), + parse_expr("properties.a ilike '%3%'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "3", "operator": "not_icontains"}), + parse_expr("properties.a not ilike '%3%'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ".*", "operator": "regex"}), + parse_expr("match(properties.a, '.*')"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ".*", "operator": "not_regex"}), + parse_expr("not(match(properties.a, '.*'))"), + ) + + def test_property_to_expr_event_list(self): + # positive + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "exact"}), + parse_expr("properties.a = 'b' or properties.a = 'c'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "icontains"}), + parse_expr("properties.a ilike '%b%' or properties.a ilike '%c%'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "regex"}), + parse_expr("match(properties.a, 'b') or match(properties.a, 'c')"), + ) + # negative + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "is_not"}), + parse_expr("properties.a != 'b' and properties.a != 'c'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "not_icontains"}), + parse_expr("properties.a not ilike '%b%' and properties.a not ilike '%c%'"), + ) + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "not_regex"}), + parse_expr("not(match(properties.a, 'b')) and not(match(properties.a, 'c'))"), + ) + + def test_property_to_expr_feature(self): + self.assertEqual( + property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "exact"}), + parse_expr("properties.a = 'b'"), + ) + + def test_property_to_expr_person(self): + self.assertEqual( + property_to_expr({"type": "person", "key": "a", "value": "b", "operator": "exact"}), + parse_expr("person.properties.a = 'b'"), + ) + + def test_property_to_expr_element(self): + self.assertEqual( + property_to_expr({"type": "element", "key": "selector", "value": "div", "operator": "exact"}), + selector_to_expr("div"), + ) + self.assertEqual( + property_to_expr({"type": "element", "key": "selector", "value": "div", "operator": "is_not"}), + not_call(selector_to_expr("div")), + ) + self.assertEqual( + property_to_expr({"type": "element", "key": "tag_name", "value": "div", "operator": "exact"}), + tag_name_to_expr("div"), + ) + self.assertEqual( + property_to_expr({"type": "element", "key": "tag_name", "value": "div", "operator": "is_not"}), + not_call(tag_name_to_expr("div")), + ) + self.assertEqual( + property_to_expr({"type": "element", "key": "href", "value": "href-text.", "operator": "exact"}), + element_chain_key_filter("href", "href-text.", PropertyOperator.exact), + ) + self.assertEqual( + property_to_expr({"type": "element", "key": "text", "value": "text-text.", "operator": "regex"}), + element_chain_key_filter("text", "text-text.", PropertyOperator.regex), + ) + + def test_property_groups(self): + self.assertEqual( + property_to_expr( + PropertyGroup( + type=PropertyOperatorType.AND, + values=[ + Property(type="person", key="a", value="b", operator="exact"), + Property(type="event", key="e", value="b", operator="exact"), + ], + ) + ), + parse_expr("person.properties.a = 'b' and properties.e = 'b'"), + ) + + self.assertEqual( + property_to_expr( + PropertyGroup( + type=PropertyOperatorType.OR, + values=[ + Property(type="person", key="a", value="b", operator="exact"), + Property(type="event", key="e", value="b", operator="exact"), + ], + ) + ), + parse_expr("person.properties.a = 'b' or properties.e = 'b'"), + ) + + def test_property_groups_single(self): + self.assertEqual( + property_to_expr( + PropertyGroup( + type=PropertyOperatorType.AND, + values=[ + Property(type="person", key="a", value="b", operator="exact"), + ], + ) + ), + parse_expr("person.properties.a = 'b'"), + ) + + self.assertEqual( + property_to_expr( + PropertyGroup( + type=PropertyOperatorType.OR, values=[Property(type="event", key="e", value="b", operator="exact")] + ) + ), + parse_expr("properties.e = 'b'"), + ) + + def test_property_groups_combined(self): + self.assertEqual( + property_to_expr( + PropertyGroup( + type=PropertyOperatorType.AND, + values=cast( + Union[List[Property], List[PropertyGroup]], + [ + Property(type="person", key="a", value="b", operator="exact"), + PropertyGroup( + type=PropertyOperatorType.OR, + values=[ + Property(type="person", key="a", value="b", operator="exact"), + Property(type="event", key="e", value="b", operator="exact"), + ], + ), + ], + ), + ) + ), + parse_expr("person.properties.a = 'b' and (person.properties.a = 'b' or properties.e = 'b')"), + ) + + def test_tag_name_to_expr(self): + self.assertEqual(tag_name_to_expr("a"), elements_chain_match("(^|;)a(\\.|$|;|:)")) + + def test_selector_to_expr(self): + self.assertEqual( + selector_to_expr("div"), elements_chain_match('div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))') + ) + self.assertEqual( + selector_to_expr("div > div"), + elements_chain_match( + 'div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s))).*' + ), + ) + self.assertEqual( + selector_to_expr("a[href='boo']"), + elements_chain_match('a.*?href="boo".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'), + ) + self.assertEqual( + selector_to_expr(".class"), + elements_chain_match('.*?\\.class([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'), + ) + self.assertEqual( + selector_to_expr("#withid"), + elements_chain_match('#withid([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'), + ) + + def test_elements_chain_key_filter(self): + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.is_set), elements_chain_match('(href="[^"]+")') + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.is_not_set), + not_call(elements_chain_match('(href="[^"]+")')), + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.icontains), + elements_chain_match('(?i)(href="[^"]*boo\\.\\.[^"]*")'), + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.not_icontains), + not_call(elements_chain_match('(?i)(href="[^"]*boo\\.\\.[^"]*")')), + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.regex), elements_chain_match('(href="boo..")') + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.not_regex), + not_call(elements_chain_match('(href="boo..")')), + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.exact), + elements_chain_match('(href="boo\\.\\.")'), + ) + self.assertEqual( + element_chain_key_filter("href", "boo..", PropertyOperator.is_not), + not_call(elements_chain_match('(href="boo\\.\\.")')), + ) + + def test_action_to_expr(self): + action1 = Action.objects.create(team=self.team) + ActionStep.objects.create(event="$autocapture", action=action1, selector="a.nav-link.active", tag_name="a") + self.assertEqual( + action_to_expr(action1), + parse_expr( + "event = '$autocapture' and match(elements_chain, {regex1}) and match(elements_chain, {regex2})", + { + "regex1": ast.Constant( + value='a.*?\\.active\\..*?nav-link([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' + ), + "regex2": ast.Constant(value="(^|;)a(\\.|$|;|:)"), + }, + ), + ) + + action2 = Action.objects.create(team=self.team) + ActionStep.objects.create(event="$pageview", action=action2, url="https://example.com", url_matching="contains") + self.assertEqual( + action_to_expr(action2), + parse_expr("event = '$pageview' and properties.$current_url like '%https://example.com%'"), + ) + + action3 = Action.objects.create(team=self.team) + ActionStep.objects.create(event="$pageview", action=action3, url="https://example2.com", url_matching="regex") + ActionStep.objects.create(event="custom", action=action3, url="https://example3.com", url_matching="exact") + self.assertEqual( + action_to_expr(action3), + parse_expr( + "{s1} or {s2}", + { + "s1": parse_expr("event = '$pageview' and match(properties.$current_url, 'https://example2.com')"), + "s2": parse_expr("event = 'custom' and properties.$current_url = 'https://example3.com'"), + }, + ), + ) diff --git a/posthog/hogql/test/test_query.py b/posthog/hogql/test/test_query.py index 37fcdeecfc39a..e998ce60528e7 100644 --- a/posthog/hogql/test/test_query.py +++ b/posthog/hogql/test/test_query.py @@ -40,11 +40,11 @@ def test_query(self): ) self.assertEqual( response.clickhouse, - f"SELECT count(*), event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event LIMIT 1000", + f"SELECT count(*), event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT count(), event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event LIMIT 1000", + "SELECT count(), event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) @@ -55,11 +55,11 @@ def test_query(self): ) self.assertEqual( response.clickhouse, - f"SELECT count, event FROM (SELECT count(*) AS count, event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event) GROUP BY count, event LIMIT 1000", + f"SELECT count, event FROM (SELECT count(*) AS count, event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event) GROUP BY count, event LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT count, event FROM (SELECT count() AS count, event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event) GROUP BY count, event LIMIT 1000", + "SELECT count, event FROM (SELECT count() AS count, event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event) GROUP BY count, event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) @@ -70,11 +70,11 @@ def test_query(self): ) self.assertEqual( response.clickhouse, - f"SELECT c.count, c.event FROM (SELECT count(*) AS count, event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event) AS c GROUP BY c.count, c.event LIMIT 1000", + f"SELECT c.count, c.event FROM (SELECT count(*) AS count, event FROM events WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', ''), %(hogql_val_1)s)) GROUP BY event) AS c GROUP BY c.count, c.event LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT count, event FROM (SELECT count() AS count, event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event) AS c GROUP BY count, event LIMIT 1000", + "SELECT count, event FROM (SELECT count() AS count, event FROM events WHERE equals(properties.random_uuid, %(hogql_val_2)s) GROUP BY event) AS c GROUP BY count, event LIMIT 100", ) self.assertEqual(response.results, [(2, "random event")]) @@ -85,11 +85,11 @@ def test_query(self): ) self.assertEqual( response.clickhouse, - f"SELECT DISTINCT replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', '') FROM person WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_1)s), '^\"|\"$', ''), %(hogql_val_2)s)) LIMIT 1000", + f"SELECT DISTINCT replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_0)s), '^\"|\"$', '') FROM person WHERE and(equals(team_id, {self.team.id}), equals(replaceRegexpAll(JSONExtractRaw(properties, %(hogql_val_1)s), '^\"|\"$', ''), %(hogql_val_2)s)) LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT DISTINCT properties.email FROM person WHERE equals(properties.random_uuid, %(hogql_val_3)s) LIMIT 1000", + "SELECT DISTINCT properties.email FROM person WHERE equals(properties.random_uuid, %(hogql_val_3)s) LIMIT 100", ) self.assertEqual(response.results, [("tim@posthog.com",)]) @@ -99,11 +99,11 @@ def test_query(self): ) self.assertEqual( response.clickhouse, - f"SELECT DISTINCT person_id, distinct_id FROM person_distinct_id2 WHERE equals(team_id, {self.team.id}) LIMIT 1000", + f"SELECT DISTINCT person_id, distinct_id FROM person_distinct_id2 WHERE equals(team_id, {self.team.id}) LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT DISTINCT person_id, distinct_id FROM person_distinct_id2 LIMIT 1000", + "SELECT DISTINCT person_id, distinct_id FROM person_distinct_id2 LIMIT 100", ) self.assertTrue(len(response.results) > 0) @@ -124,11 +124,11 @@ def test_query_joins_simple(self): ) self.assertEqual( response.clickhouse, - f"SELECT e.event, e.timestamp, pdi.distinct_id, p.id, replaceRegexpAll(JSONExtractRaw(p.properties, %(hogql_val_0)s), '^\"|\"$', '') FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) WHERE and(equals(p.team_id, {self.team.id}), equals(pdi.team_id, {self.team.id}), equals(e.team_id, {self.team.id})) LIMIT 1000", + f"SELECT e.event, e.timestamp, pdi.distinct_id, p.id, replaceRegexpAll(JSONExtractRaw(p.properties, %(hogql_val_0)s), '^\"|\"$', '') FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) WHERE and(equals(p.team_id, {self.team.id}), equals(pdi.team_id, {self.team.id}), equals(e.team_id, {self.team.id})) LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT event, timestamp, pdi.distinct_id, p.id, p.properties.email FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) LIMIT 1000", + "SELECT event, timestamp, pdi.distinct_id, p.id, p.properties.email FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) LIMIT 100", ) self.assertEqual(response.results[0][0], "random event") self.assertEqual(response.results[0][2], "bla") @@ -155,11 +155,11 @@ def test_query_joins_pdi(self): self.assertEqual( response.clickhouse, - f"SELECT e.event, e.timestamp, pdi.person_id FROM events AS e INNER JOIN (SELECT distinct_id, argMax(person_distinct_id2.person_id, version) AS person_id FROM person_distinct_id2 WHERE equals(team_id, {self.team.id}) GROUP BY distinct_id HAVING equals(argMax(is_deleted, version), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) WHERE equals(e.team_id, {self.team.id}) LIMIT 1000", + f"SELECT e.event, e.timestamp, pdi.person_id FROM events AS e INNER JOIN (SELECT distinct_id, argMax(person_distinct_id2.person_id, version) AS person_id FROM person_distinct_id2 WHERE equals(team_id, {self.team.id}) GROUP BY distinct_id HAVING equals(argMax(is_deleted, version), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) WHERE equals(e.team_id, {self.team.id}) LIMIT 100", ) self.assertEqual( response.hogql, - "SELECT event, timestamp, pdi.person_id FROM events AS e INNER JOIN (SELECT distinct_id, argMax(person_id, version) AS person_id FROM person_distinct_id2 GROUP BY distinct_id HAVING equals(argMax(is_deleted, version), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) LIMIT 1000", + "SELECT event, timestamp, pdi.person_id FROM events AS e INNER JOIN (SELECT distinct_id, argMax(person_id, version) AS person_id FROM person_distinct_id2 GROUP BY distinct_id HAVING equals(argMax(is_deleted, version), 0)) AS pdi ON equals(e.distinct_id, pdi.distinct_id) LIMIT 100", ) self.assertTrue(len(response.results) > 0) diff --git a/posthog/models/event/events_query.py b/posthog/models/event/events_query.py new file mode 100644 index 0000000000000..b5ab14a949d0b --- /dev/null +++ b/posthog/models/event/events_query.py @@ -0,0 +1,173 @@ +import json +from datetime import timedelta +from typing import List, Optional, Tuple + +from dateutil.parser import isoparse +from django.utils.timezone import now + +from posthog.api.element import ElementSerializer +from posthog.api.utils import get_pk_or_uuid +from posthog.clickhouse.client.connection import Workload +from posthog.hogql import ast +from posthog.hogql.constants import SELECT_STAR_FROM_EVENTS_FIELDS +from posthog.hogql.parser import parse_expr, parse_order_expr +from posthog.hogql.property import action_to_expr, has_aggregation, property_to_expr +from posthog.hogql.query import execute_hogql_query +from posthog.models import Action, Person, Team +from posthog.models.element import chain_to_elements +from posthog.schema import EventsQuery, EventsQueryResponse +from posthog.utils import relative_date_parse + +QUERY_DEFAULT_LIMIT = 100 +QUERY_DEFAULT_EXPORT_LIMIT = 3_500 +QUERY_MAXIMUM_LIMIT = 100_000 + + +def run_events_query( + team: Team, + query: EventsQuery, +) -> EventsQueryResponse: + # Note: This code is inefficient and problematic, see https://github.com/PostHog/posthog/issues/13485 for details. + # To isolate its impact from rest of the queries its queries are run on different nodes as part of "offline" workloads. + + # limit & offset + # adding +1 to the limit to check if there's a "next page" after the requested results + limit = min(QUERY_MAXIMUM_LIMIT, QUERY_DEFAULT_LIMIT if query.limit is None else query.limit) + 1 + offset = 0 if query.offset is None else query.offset + + # columns & group_by + select_input_raw = ["*"] if len(query.select) == 0 else query.select + select_input: List[str] = [] + for col in select_input_raw: + # Selecting a "*" expands the list of columns, resulting in a table that's not what we asked for. + # Instead, ask for a tuple with all the columns we want. Later transform this back into a dict. + if col == "*": + select_input.append(f"tuple({', '.join(SELECT_STAR_FROM_EVENTS_FIELDS)})") + elif col == "person": + # Select just enough person fields to show the name/email in the UI. Put it back into a dict later. + select_input.append( + "tuple(distinct_id, person_id, person.created_at, person.properties.name, person.properties.email)" + ) + else: + select_input.append(col) + + select: List[ast.Expr] = [parse_expr(column) for column in select_input] + group_by: List[ast.Expr] = [column for column in select if not has_aggregation(column)] + aggregations: List[ast.Expr] = [column for column in select if has_aggregation(column)] + has_any_aggregation = len(aggregations) > 0 + + # filters + where_input = query.where or [] + where_exprs = [parse_expr(expr) for expr in where_input] + if query.properties: + where_exprs.extend(property_to_expr(property) for property in query.properties) + if query.fixedProperties: + where_exprs.extend(property_to_expr(property) for property in query.fixedProperties) + if query.event: + where_exprs.append(parse_expr("event = {event}", {"event": ast.Constant(value=query.event)})) + if query.actionId: + try: + action = Action.objects.get(pk=query.actionId, team_id=team.pk) + except Action.DoesNotExist: + raise Exception("Action does not exist") + if action.steps.count() == 0: + raise Exception("Action does not have any match groups") + where_exprs.append(action_to_expr(action)) + if query.personId: + person: Optional[Person] = get_pk_or_uuid(Person.objects.all(), query.personId).first() + distinct_ids = person.distinct_ids if person is not None else [] + ids_list = list(map(str, distinct_ids)) + where_exprs.append(parse_expr("distinct_id in {list}", {"list": ast.Constant(value=ids_list)})) + + # prevent accidentally future events from being visible by default + before = query.before or (now() + timedelta(seconds=5)).isoformat() + try: + timestamp = isoparse(before).strftime("%Y-%m-%d %H:%M:%S.%f") + except ValueError: + timestamp = relative_date_parse(before).strftime("%Y-%m-%d %H:%M:%S.%f") + where_exprs.append(parse_expr("timestamp < {timestamp}", {"timestamp": ast.Constant(value=timestamp)})) + + # limit to the last 24h by default + after = query.after or "-24h" + try: + timestamp = isoparse(after).strftime("%Y-%m-%d %H:%M:%S.%f") + except ValueError: + timestamp = relative_date_parse(after).strftime("%Y-%m-%d %H:%M:%S.%f") + where_exprs.append(parse_expr("timestamp > {timestamp}", {"timestamp": ast.Constant(value=timestamp)})) + + # where & having + where_list = [expr for expr in where_exprs if not has_aggregation(expr)] + where = ast.And(exprs=where_list) if len(where_list) > 0 else None + having_list = [expr for expr in where_exprs if has_aggregation(expr)] + having = ast.And(exprs=having_list) if len(having_list) > 0 else None + + # order by + if query.orderBy is not None: + order_by = [parse_order_expr(column) for column in query.orderBy] + elif "count()" in select_input: + order_by = [ast.OrderExpr(expr=parse_expr("count()"), order="DESC")] + elif len(aggregations) > 0: + order_by = [ast.OrderExpr(expr=aggregations[0], order="DESC")] + elif "timestamp" in select_input: + order_by = [ast.OrderExpr(expr=ast.Field(chain=["timestamp"]), order="DESC")] + elif len(select) > 0: + order_by = [ast.OrderExpr(expr=select[0], order="ASC")] + else: + order_by = [] + + stmt = ast.SelectQuery( + select=select, + select_from=ast.JoinExpr(table=ast.Field(chain=["events"])), + where=where, + having=having, + group_by=group_by if has_any_aggregation else None, + order_by=order_by, + limit=ast.Constant(value=limit), + offset=ast.Constant(value=offset), + ) + + query_result = execute_hogql_query(query=stmt, team=team, workload=Workload.OFFLINE, query_type="EventsQuery") + + # Convert star field from tuple to dict in each result + if "*" in select_input_raw: + star_idx = select_input_raw.index("*") + for index, result in enumerate(query_result.results): + query_result.results[index] = list(result) + select = result[star_idx] + new_result = dict(zip(SELECT_STAR_FROM_EVENTS_FIELDS, select)) + new_result["properties"] = json.loads(new_result["properties"]) + if new_result["elements_chain"]: + new_result["elements"] = ElementSerializer( + chain_to_elements(new_result["elements_chain"]), many=True + ).data + new_result["person"] = { + "id": new_result["person_id"], + "created_at": new_result["person.created_at"], + "properties": json.loads(new_result["person.properties"]), + "distinct_ids": [new_result["distinct_id"]], + } + del new_result["person_id"] + del new_result["person.created_at"] + del new_result["person.properties"] + query_result.results[index][star_idx] = new_result + + # Convert person field from tuple to dict in each result + if "person" in select_input_raw: + person_idx = select_input_raw.index("person") + for index, result in enumerate(query_result.results): + person_tuple: Tuple = result[person_idx] + query_result.results[index] = list(result) + query_result.results[index][person_idx] = { + "id": person_tuple[1], + "created_at": person_tuple[2], + "properties": {"name": person_tuple[3], "email": person_tuple[4]}, + "distinct_ids": [person_tuple[0]], + } + + received_extra_row = len(query_result.results) == limit # limit was +=1'd above + return EventsQueryResponse( + results=query_result.results[: limit - 1] if received_extra_row else query_result.results, + columns=select_input_raw, + types=[type for _, type in query_result.types], + hasMore=received_extra_row, + ) diff --git a/posthog/models/event/query_event_list.py b/posthog/models/event/query_event_list.py index c84358665f6a3..e00259ea59436 100644 --- a/posthog/models/event/query_event_list.py +++ b/posthog/models/event/query_event_list.py @@ -1,34 +1,23 @@ -import json from datetime import timedelta -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union from dateutil.parser import isoparse from django.utils.timezone import now from posthog.api.utils import get_pk_or_uuid from posthog.clickhouse.client.connection import Workload -from posthog.hogql.constants import SELECT_STAR_FROM_EVENTS_FIELDS from posthog.hogql.context import HogQLContext -from posthog.hogql.hogql import translate_hogql from posthog.models import Action, Filter, Person, Team from posthog.models.action.util import format_action_filter -from posthog.models.element import chain_to_elements +from posthog.models.event.events_query import QUERY_DEFAULT_LIMIT from posthog.models.event.sql import ( SELECT_EVENT_BY_TEAM_AND_CONDITIONS_FILTERS_SQL, SELECT_EVENT_BY_TEAM_AND_CONDITIONS_SQL, - SELECT_EVENT_FIELDS_BY_TEAM_AND_CONDITIONS_FILTERS_PART, ) -from posthog.models.event.util import ElementSerializer from posthog.models.property.util import parse_prop_grouped_clauses -from posthog.queries.insight import insight_query_with_columns, insight_sync_execute -from posthog.schema import EventsQuery, EventsQueryResponse +from posthog.queries.insight import insight_query_with_columns from posthog.utils import relative_date_parse -# Return at most this number of events in CSV export -QUERY_DEFAULT_LIMIT = 100 -QUERY_DEFAULT_EXPORT_LIMIT = 3_500 -QUERY_MAXIMUM_LIMIT = 100_000 - def determine_event_conditions(conditions: Dict[str, Union[None, str, List[str]]]) -> Tuple[str, Dict]: result = "" @@ -137,157 +126,3 @@ def query_events_list( query_type="events_list", workload=Workload.OFFLINE, ) - - -def run_events_query( - team: Team, - query: EventsQuery, -) -> EventsQueryResponse: - # Note: This code is inefficient and problematic, see https://github.com/PostHog/posthog/issues/13485 for details. - # To isolate its impact from rest of the queries its queries are run on different nodes as part of "offline" workloads. - hogql_context = HogQLContext(within_non_hogql_query=True) - - # adding +1 to the limit to check if there's a "next page" after the requested results - limit = min(QUERY_MAXIMUM_LIMIT, QUERY_DEFAULT_LIMIT if query.limit is None else query.limit) + 1 - offset = 0 if query.offset is None else query.offset - action_id = query.actionId - person_id = query.personId - order_by = query.orderBy - select = query.select - where = query.where.copy() if query.where else [] # Shallow-copy since we'll be modifying it - event = query.event - - classic_properties = [] - classic_properties.extend(query.fixedProperties or []) - classic_properties.extend(query.properties or []) - - # Split HogQL properties from the rest, as "where" supports filtering by "having" aggregations like "count() > 2" - properties = [] - for prop in classic_properties: - if prop.type == "hogql": - where.append(str(prop.key)) - else: - properties.append(prop.dict()) - - limit_sql = "LIMIT %(limit)s" - if offset > 0: - limit_sql += " OFFSET %(offset)s" - - conditions, condition_params = determine_event_conditions( - { - # Don't show events that have been ingested with timestamps in the future. Would break new event polling. - "after": query.after, - "before": query.before or (now() + timedelta(seconds=5)).isoformat(), - "person_id": person_id, - "event": event, - } - ) - filter = Filter(team=team, data={"properties": properties}, hogql_context=hogql_context) - prop_filters, prop_filter_params = parse_prop_grouped_clauses( - team_id=team.pk, property_group=filter.property_groups, has_person_id_joined=False, hogql_context=hogql_context - ) - - if action_id: - try: - action = Action.objects.get(pk=action_id, team_id=team.pk) - except Action.DoesNotExist: - raise Exception("Action does not exist") - if action.steps.count() == 0: - raise Exception("Action does not have any match groups") - - action_query, params = format_action_filter(team_id=team.pk, action=action, hogql_context=hogql_context) - prop_filters += " AND {}".format(action_query) - prop_filter_params = {**prop_filter_params, **params} - - select_columns: List[str] = [] - group_by_columns: List[str] = [] - where_filters: List[str] = [] - having_filters: List[str] = [] - order_by_list: List[str] = [] - - if len(select) == 0: - select = ["*"] - - for expr in select: - hogql_context.found_aggregation = False - if expr == "*": - expr = f'tuple({", ".join(SELECT_STAR_FROM_EVENTS_FIELDS)})' - clickhouse_sql = translate_hogql(expr, hogql_context) - select_columns.append(clickhouse_sql) - if not hogql_context.found_aggregation: - group_by_columns.append(clickhouse_sql) - - for expr in where or []: - hogql_context.found_aggregation = False - clickhouse_sql = translate_hogql(expr, hogql_context) - if hogql_context.found_aggregation: - having_filters.append(clickhouse_sql) - else: - where_filters.append(clickhouse_sql) - - if order_by: - for fragment in order_by: - order_direction = "ASC" - if fragment.startswith("-"): - order_direction = "DESC" - fragment = fragment[1:] - order_by_list.append(translate_hogql(fragment, hogql_context) + " " + order_direction) - else: - if "count(*)" in select_columns or "count()" in select_columns: - order_by_list.append("count() DESC") - elif "timestamp" in select_columns: - order_by_list.append("timestamp DESC") - else: - order_by_list.append(select_columns[0] + " ASC") - - if select_columns == group_by_columns: - group_by_columns = [] - - results, types = insight_sync_execute( - SELECT_EVENT_FIELDS_BY_TEAM_AND_CONDITIONS_FILTERS_PART.format( - columns=", ".join(select_columns), - conditions=conditions, - filters=prop_filters, - where="AND {}".format(" AND ".join(where_filters)) if where_filters else "", - group="GROUP BY {}".format(", ".join(group_by_columns)) if group_by_columns else "", - having="HAVING {}".format(" AND ".join(having_filters)) if having_filters else "", - order="ORDER BY {}".format(", ".join(order_by_list)) if order_by_list else "", - limit=limit_sql, - ), - { - "team_id": team.pk, - "limit": limit, - "offset": offset, - **condition_params, - **prop_filter_params, - **hogql_context.values, - }, - with_column_types=True, - query_type="events_list", - workload=Workload.OFFLINE, - filter=filter, - ) - - # Convert star field from tuple to dict in each result - if "*" in select: - star = select.index("*") - for index, result in enumerate(results): - results[index] = list(result) - results[index][star] = convert_star_select_to_dict(result[star]) - - received_extra_row = len(results) == limit # limit was +=1'd above - - return EventsQueryResponse( - results=results[: limit - 1] if received_extra_row else results, - columns=select, - types=[type for _, type in types], - hasMore=received_extra_row, - ) - - -def convert_star_select_to_dict(select: Tuple[Any]) -> Dict[str, Any]: - new_result = dict(zip(SELECT_STAR_FROM_EVENTS_FIELDS, select)) - new_result["properties"] = json.loads(new_result["properties"]) - if new_result["elements_chain"]: - new_result["elements"] = ElementSerializer(chain_to_elements(new_result["elements_chain"]), many=True).data - return new_result diff --git a/posthog/models/event/sql.py b/posthog/models/event/sql.py index 2306fdc22a0a3..2fc981d2bf604 100644 --- a/posthog/models/event/sql.py +++ b/posthog/models/event/sql.py @@ -310,20 +310,6 @@ ORDER BY timestamp {order} {limit} """ -SELECT_EVENT_FIELDS_BY_TEAM_AND_CONDITIONS_FILTERS_PART = """ -SELECT {columns} -FROM events -WHERE -team_id = %(team_id)s -{conditions} -{filters} -{where} -{group} -{having} -{order} -{limit} -""" - SELECT_ONE_EVENT_SQL = """ SELECT uuid,