Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: error clustering UI #20958

Merged
merged 18 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 27 additions & 13 deletions ee/session_recordings/ai/error_clustering.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from prometheus_client import Histogram
from django.conf import settings
from posthog.clickhouse.client import sync_execute
from posthog.models.team import Team
from posthog.models import Team, User
from sklearn.cluster import DBSCAN
import pandas as pd
import numpy as np
from posthog.session_recordings.models.session_recording_event import SessionRecordingViewed

CLUSTER_REPLAY_ERRORS_TIMING = Histogram(
"posthog_session_recordings_cluster_replay_errors",
Expand All @@ -22,7 +24,7 @@
DBSCAN_MIN_SAMPLES = settings.REPLAY_EMBEDDINGS_CLUSTERING_DBSCAN_MIN_SAMPLES


def error_clustering(team: Team):
def error_clustering(team: Team, user: User):
results = fetch_error_embeddings(team.pk)

if not results:
Expand All @@ -34,7 +36,7 @@ def error_clustering(team: Team):

CLUSTER_REPLAY_ERRORS_CLUSTER_COUNT.labels(team_id=team.pk).observe(df["cluster"].nunique())

return construct_response(df)
return construct_response(df, team, user)


def fetch_error_embeddings(team_id: int):
Expand Down Expand Up @@ -64,13 +66,25 @@ def cluster_embeddings(embeddings):
return dbscan.labels_


def construct_response(df):
return [
{
"cluster": cluster,
"samples": rows.head(n=DBSCAN_MIN_SAMPLES)[["session_id", "input"]].to_dict("records"),
"occurrences": rows.size,
"unique_sessions": rows["session_id"].count(),
}
for cluster, rows in df.groupby("cluster")
]
def construct_response(df: pd.DataFrame, team: Team, user: User):
viewed_session_ids = list(
SessionRecordingViewed.objects.filter(team=team, user=user, session_id__in=df["session_id"].unique())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this list be potentially too long to be passed to Postgres?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think because we're scoping it to team and individual user we should be ok...

Could write a test to see how many session ids before we get an error but it's still not generally released so we should be ok

.values_list("session_id", flat=True)
.distinct()
)

clusters = []
for cluster, rows in df.groupby("cluster"):
session_ids = rows["session_id"].unique()
sample = rows.sample(n=1)[["session_id", "input"]].rename(columns={"input": "error"}).to_dict("records")
clusters.append(
{
"cluster": cluster,
"sample": sample,
"session_ids": session_ids,
"occurrences": rows.size,
"unique_sessions": len(session_ids),
"viewed": len(np.intersect1d(session_ids, viewed_session_ids, assume_unique=True)),
}
)
return clusters
158 changes: 109 additions & 49 deletions frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
import { IconFeatures } from '@posthog/icons'
import { LemonButton, LemonCollapse, Spinner } from '@posthog/lemon-ui'
import { LemonButton, LemonTable, LemonTabs, Spinner } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
import { JSONViewer } from 'lib/components/JSONViewer'
import { useState } from 'react'
import { urls } from 'scenes/urls'

import { ErrorClusterSample } from '~/types'

import { SessionPlayerModal } from '../player/modal/SessionPlayerModal'
import { sessionPlayerModalLogic } from '../player/modal/sessionPlayerModalLogic'
import { sessionRecordingErrorsLogic } from './sessionRecordingErrorsLogic'

const MAX_TITLE_LENGTH = 75

export function SessionRecordingErrors(): JSX.Element {
const { openSessionPlayer } = useActions(sessionPlayerModalLogic)
const { errors, errorsLoading } = useValues(sessionRecordingErrorsLogic)
const { loadErrorClusters } = useActions(sessionRecordingErrorsLogic)

Expand All @@ -24,57 +29,112 @@ export function SessionRecordingErrors(): JSX.Element {
}

return (
<LemonCollapse
panels={errors.map((error) => ({
key: error.cluster,
header: (
<ErrorPanelHeader
occurrenceCount={error.occurrences}
sessionCount={error.unique_sessions}
example={error.samples[0]}
/>
),
content: <ErrorPanelContent samples={error.samples} />,
}))}
/>
<>
<LemonTable
columns={[
{
title: 'Error',
dataIndex: 'cluster',
render: (_, cluster) => {
const displayTitle = parseTitle(cluster.sample.error)
return (
<div title={displayTitle} className="font-semibold text-sm text-default line-clamp-1">
{displayTitle}
</div>
)
},
width: '50%',
},
{
title: 'Occurrences',
dataIndex: 'occurrences',
sorter: (a, b) => a.occurrences - b.occurrences,
},
{
title: 'Sessions',
dataIndex: 'unique_sessions',
sorter: (a, b) => a.unique_sessions - b.unique_sessions,
},
{
title: 'Viewed',
tooltip: "How many of these you've already viewed",
dataIndex: 'viewed',
render: function Render(_, cluster) {
return `${((cluster.viewed / cluster.unique_sessions) * 100).toFixed(0)}%`
},
sorter: (a, b) => a.viewed / a.unique_sessions - b.viewed / b.unique_sessions,
},
{
title: 'Actions',
render: function Render(_, cluster) {
return (
<LemonButton
to={urls.replaySingle(cluster.sample.session_id)}
onClick={(e) => {
e.preventDefault()
openSessionPlayer({ id: cluster.sample.session_id })
}}
className="p-2 whitespace-nowrap"
type="primary"
>
Watch example
</LemonButton>
)
},
},
]}
dataSource={errors}
expandable={{ expandedRowRender: (cluster) => <ExpandedError error={cluster.sample.error} /> }}
/>
<SessionPlayerModal />
</>
)
}

const ErrorPanelHeader = ({
occurrenceCount,
sessionCount,
example,
}: {
occurrenceCount: number
sessionCount: number
example: ErrorClusterSample
}): JSX.Element => {
return (
<div className="w-full flex justify-between items-center gap-2">
<span className="truncate">{example.input}</span>
<div className="flex items-center gap-2">
<span className="text-muted">
{occurrenceCount} occurrences / {sessionCount} sessions
</span>
<LemonButton type="secondary" to={urls.replaySingle(example.session_id)}>
Watch recording
</LemonButton>
</div>
const ExpandedError = ({ error }: { error: string }): JSX.Element => {
const hasJson = isJSON(error)
const [activeTab, setActiveTab] = useState(hasJson ? 'json' : 'raw')

return hasJson ? (
<div className="pb-3">
<LemonTabs
activeKey={activeTab}
onChange={setActiveTab}
tabs={[
hasJson && {
key: 'json',
label: 'JSON',
content: <JSONViewer src={JSON.parse(error)} style={{ whiteSpace: 'pre-wrap' }} />,
},
{ key: 'raw', label: 'Raw', content: <span className="whitespace-pre-line">{error}</span> },
]}
/>
</div>
) : (
<div className="py-3 space-y-1">
<h3>Example error</h3>
<div className="whitespace-pre-line">{error}</div>
</div>
)
}

const ErrorPanelContent = ({ samples }: { samples: ErrorClusterSample[] }): JSX.Element => {
return (
<div className="flex flex-col space-y-2">
{samples.map((error) => (
<div key={error.session_id} className="flex justify-between items-center">
<span>{error.input}</span>
<LemonButton type="secondary" to={urls.replaySingle(error.session_id)}>
Watch recording
</LemonButton>
</div>
))}
</div>
)
function isJSON(str: string): boolean {
try {
JSON.parse(str)
return true
} catch {
return false
}
}

function parseTitle(error: string): string {
let input
try {
const parsedError = JSON.parse(error)
input = parsedError.error || error
} catch {
input = error
}

return input.split('\n')[0].trim().substring(0, MAX_TITLE_LENGTH)
}
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ function PinnedIndicator(): JSX.Element | null {
)
}

function ViewedIndicator(props: { viewed: boolean }): JSX.Element | null {
return !props.viewed ? (
function ViewedIndicator({ viewed }: { viewed: boolean }): JSX.Element | null {
return !viewed ? (
<Tooltip title="Indicates the recording has not been watched yet">
<div className="w-2 h-2 m-1 rounded-full bg-primary-3000" aria-label="unwatched-recording-label" />
</Tooltip>
Expand Down
7 changes: 3 additions & 4 deletions frontend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -901,13 +901,12 @@ export interface SessionRecordingsResponse {
has_next: boolean
}

export type ErrorClusterSample = { session_id: string; input: string }

type ErrorCluster = {
export type ErrorCluster = {
cluster: number
samples: ErrorClusterSample[]
sample: { session_id: string; error: string }
occurrences: number
unique_sessions: number
viewed: number
}
export type ErrorClusterResponse = ErrorCluster[] | null

Expand Down
2 changes: 1 addition & 1 deletion posthog/session_recordings/session_recording_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ def error_clusters(self, request: request.Request, **kwargs):
raise exceptions.ValidationError("clustered errors is not enabled for this user")

# Clustering will eventually be done during a scheduled background task
clusters = error_clustering(self.team)
clusters = error_clustering(self.team, user)

if clusters:
cache.set(cache_key, clusters, timeout=30)
Expand Down
Loading