-
Notifications
You must be signed in to change notification settings - Fork 13.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(reports): execute as other than selenium user #21931
Changes from all commits
7734d1c
62b853c
87d1151
ad97e05
439beea
9c66a58
4e5d2a8
fbab953
b56b76c
7b820c6
adeaeb2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,10 +22,9 @@ | |
|
||
import pandas as pd | ||
from celery.exceptions import SoftTimeLimitExceeded | ||
from flask_appbuilder.security.sqla.models import User | ||
from sqlalchemy.orm import Session | ||
|
||
from superset import app, security_manager | ||
from superset import app | ||
from superset.commands.base import BaseCommand | ||
from superset.commands.exceptions import CommandException | ||
from superset.common.chart_data import ChartDataResultFormat, ChartDataResultType | ||
|
@@ -45,7 +44,6 @@ | |
ReportSchedulePreviousWorkingError, | ||
ReportScheduleScreenshotFailedError, | ||
ReportScheduleScreenshotTimeout, | ||
ReportScheduleSelleniumUserNotFoundError, | ||
ReportScheduleStateNotFoundError, | ||
ReportScheduleUnexpectedError, | ||
ReportScheduleWorkingTimeoutError, | ||
|
@@ -67,6 +65,7 @@ | |
from superset.reports.notifications import create_notification | ||
from superset.reports.notifications.base import NotificationContent | ||
from superset.reports.notifications.exceptions import NotificationError | ||
from superset.reports.utils import get_executor | ||
from superset.utils.celery import session_scope | ||
from superset.utils.core import HeaderDataType, override_user | ||
from superset.utils.csv import get_chart_csv_data, get_chart_dataframe | ||
|
@@ -77,13 +76,6 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
def _get_user() -> User: | ||
user = security_manager.find_user(username=app.config["THUMBNAIL_SELENIUM_USER"]) | ||
if not user: | ||
raise ReportScheduleSelleniumUserNotFoundError() | ||
return user | ||
|
||
|
||
class BaseReportState: | ||
current_states: List[ReportState] = [] | ||
initial: bool = False | ||
|
@@ -182,11 +174,11 @@ def _get_url( | |
**kwargs, | ||
) | ||
|
||
# If we need to render dashboard in a specific sate, use stateful permalink | ||
# If we need to render dashboard in a specific state, use stateful permalink | ||
dashboard_state = self._report_schedule.extra.get("dashboard") | ||
if dashboard_state: | ||
permalink_key = CreateDashboardPermalinkCommand( | ||
dashboard_id=self._report_schedule.dashboard_id, | ||
dashboard_id=str(self._report_schedule.dashboard_id), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is Mypy not working as expected? Also why should the ID be a string? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mypy actually picked this up. This is due to slugs and ids being used interchangeably in the codebase (I remember being frustrated about this when working on the permalink feature). In older dashboard and chart code this is usually referred to as |
||
state=dashboard_state, | ||
).run() | ||
return get_url_path("Superset.dashboard_permalink", key=permalink_key) | ||
|
@@ -206,7 +198,7 @@ def _get_screenshots(self) -> List[bytes]: | |
:raises: ReportScheduleScreenshotFailedError | ||
""" | ||
url = self._get_url() | ||
user = _get_user() | ||
user = get_executor(self._report_schedule) | ||
if self._report_schedule.chart: | ||
screenshot: Union[ChartScreenshot, DashboardScreenshot] = ChartScreenshot( | ||
url, | ||
|
@@ -236,16 +228,15 @@ def _get_screenshots(self) -> List[bytes]: | |
|
||
def _get_csv_data(self) -> bytes: | ||
url = self._get_url(result_format=ChartDataResultFormat.CSV) | ||
auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies( | ||
_get_user() | ||
) | ||
user = get_executor(self._report_schedule) | ||
auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies(user) | ||
|
||
if self._report_schedule.chart.query_context is None: | ||
logger.warning("No query context found, taking a screenshot to generate it") | ||
self._update_query_context() | ||
|
||
try: | ||
logger.info("Getting chart from %s", url) | ||
logger.info("Getting chart from %s as user %s", url, user.username) | ||
csv_data = get_chart_csv_data(url, auth_cookies) | ||
except SoftTimeLimitExceeded as ex: | ||
raise ReportScheduleCsvTimeout() from ex | ||
|
@@ -262,16 +253,15 @@ def _get_embedded_data(self) -> pd.DataFrame: | |
Return data as a Pandas dataframe, to embed in notifications as a table. | ||
""" | ||
url = self._get_url(result_format=ChartDataResultFormat.JSON) | ||
auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies( | ||
_get_user() | ||
) | ||
user = get_executor(self._report_schedule) | ||
auth_cookies = machine_auth_provider_factory.instance.get_auth_cookies(user) | ||
|
||
if self._report_schedule.chart.query_context is None: | ||
logger.warning("No query context found, taking a screenshot to generate it") | ||
self._update_query_context() | ||
|
||
try: | ||
logger.info("Getting chart from %s", url) | ||
logger.info("Getting chart from %s as user %s", url, user.username) | ||
dataframe = get_chart_dataframe(url, auth_cookies) | ||
except SoftTimeLimitExceeded as ex: | ||
raise ReportScheduleDataFrameTimeout() from ex | ||
|
@@ -674,10 +664,16 @@ def __init__(self, task_id: str, model_id: int, scheduled_dttm: datetime): | |
def run(self) -> None: | ||
with session_scope(nullpool=True) as session: | ||
try: | ||
with override_user(_get_user()): | ||
self.validate(session=session) | ||
if not self._model: | ||
raise ReportScheduleExecuteUnexpectedError() | ||
self.validate(session=session) | ||
if not self._model: | ||
raise ReportScheduleExecuteUnexpectedError() | ||
user = get_executor(self._model) | ||
with override_user(user): | ||
logger.info( | ||
"Running report schedule %s as user %s", | ||
self._execution_id, | ||
user.username, | ||
) | ||
ReportScheduleStateMachine( | ||
session, self._execution_id, self._model, self._scheduled_dttm | ||
).run() | ||
|
@@ -695,6 +691,8 @@ def validate( # pylint: disable=arguments-differ | |
self._model_id, | ||
self._execution_id, | ||
) | ||
self._model = ReportScheduleDAO.find_by_id(self._model_id, session=session) | ||
self._model = ( | ||
session.query(ReportSchedule).filter_by(id=self._model_id).one_or_none() | ||
) | ||
if not self._model: | ||
raise ReportScheduleNotFoundError() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from flask_appbuilder.security.sqla.models import User | ||
|
||
from superset import app, security_manager | ||
from superset.reports.commands.exceptions import ReportScheduleUserNotFoundError | ||
from superset.reports.models import ReportSchedule | ||
from superset.reports.types import ReportScheduleExecutor | ||
|
||
|
||
# pylint: disable=too-many-branches | ||
def get_executor(report_schedule: ReportSchedule) -> User: | ||
""" | ||
Extract the user that should be used to execute a report schedule as. | ||
|
||
:param report_schedule: The report to execute | ||
:return: User to execute the report as | ||
""" | ||
user_types = app.config["ALERT_REPORTS_EXECUTE_AS"] | ||
owners = report_schedule.owners | ||
owner_dict = {owner.id: owner for owner in owners} | ||
for user_type in user_types: | ||
if user_type == ReportScheduleExecutor.SELENIUM: | ||
username = app.config["THUMBNAIL_SELENIUM_USER"] | ||
if username and (user := security_manager.find_user(username=username)): | ||
return user | ||
if user_type == ReportScheduleExecutor.CREATOR_OWNER: | ||
if (user := report_schedule.created_by) and ( | ||
owner := owner_dict.get(user.id) | ||
): | ||
return owner | ||
if user_type == ReportScheduleExecutor.CREATOR: | ||
if user := report_schedule.created_by: | ||
return user | ||
if user_type == ReportScheduleExecutor.MODIFIER_OWNER: | ||
if (user := report_schedule.changed_by) and ( | ||
owner := owner_dict.get(user.id) | ||
): | ||
return owner | ||
if user_type == ReportScheduleExecutor.MODIFIER: | ||
if user := report_schedule.changed_by: | ||
return user | ||
if user_type == ReportScheduleExecutor.OWNER: | ||
owners = report_schedule.owners | ||
if len(owners) == 1: | ||
return owners[0] | ||
if len(owners) > 1: | ||
if modifier := report_schedule.changed_by: | ||
if modifier and (user := owner_dict.get(modifier.id)): | ||
return user | ||
if creator := report_schedule.created_by: | ||
if creator and (user := owner_dict.get(creator.id)): | ||
return user | ||
return owners[0] | ||
|
||
raise ReportScheduleUserNotFoundError() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,8 @@ | |
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from __future__ import annotations | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor bycatches |
||
|
||
import importlib | ||
import logging | ||
from typing import Callable, Dict, TYPE_CHECKING | ||
|
@@ -34,7 +36,7 @@ | |
|
||
class MachineAuthProvider: | ||
def __init__( | ||
self, auth_webdriver_func_override: Callable[[WebDriver, "User"], WebDriver] | ||
self, auth_webdriver_func_override: Callable[[WebDriver, User], WebDriver] | ||
): | ||
# This is here in order to allow for the authenticate_webdriver func to be | ||
# overridden via config, as opposed to the entire provider implementation | ||
|
@@ -43,7 +45,7 @@ def __init__( | |
def authenticate_webdriver( | ||
self, | ||
driver: WebDriver, | ||
user: "User", | ||
user: User, | ||
dpgaspar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) -> WebDriver: | ||
""" | ||
Default AuthDriverFuncType type that sets a session cookie flask-login style | ||
|
@@ -69,7 +71,7 @@ def authenticate_webdriver( | |
return driver | ||
|
||
@staticmethod | ||
def get_auth_cookies(user: "User") -> Dict[str, str]: | ||
def get_auth_cookies(user: User) -> Dict[str, str]: | ||
# Login with the user specified to get the reports | ||
with current_app.test_request_context("/login"): | ||
login_user(user) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a typo here + we should make this more generic now as it's not really bound to a specific Selenium user