Skip to content

Commit

Permalink
fixup! Prevent large objects from being stored in the RTIF
Browse files Browse the repository at this point in the history
  • Loading branch information
ephraimbuddy committed Mar 14, 2024
1 parent a62810d commit bdd600e
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 5 deletions.
6 changes: 4 additions & 2 deletions airflow/serialization/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def is_jsonable(x):

if not is_jsonable(template_field):
if isinstance(template_field, (list, tuple)):
if sys.getsizeof(template_field[0]) > max_size:
size = sum(sys.getsizeof(x) for x in template_field)
if size > max_size:
return (
"Value redacted as it is too large to be stored in the database. "
"You can change this behaviour in [core]max_templated_field_size"
Expand All @@ -56,7 +57,8 @@ def is_jsonable(x):
return str(template_field)
else:
if template_field and isinstance(template_field, (list, tuple)):
if len(template_field) and sys.getsizeof(template_field[0]) > max_size:
size = sum(sys.getsizeof(x) for x in template_field)
if size > max_size:
return (
"Value redacted as it is too large to be stored in the database. "
"You can change this behaviour in [core]max_templated_field_size"
Expand Down
12 changes: 9 additions & 3 deletions tests/models/test_renderedtifields.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from airflow.utils.task_instance_session import set_current_task_instance_session
from airflow.utils.timezone import datetime
from tests.test_utils.asserts import assert_queries_count
from tests.test_utils.db import clear_db_dags, clear_db_runs, clear_rendered_ti_fields
from tests.test_utils.db import clear_db_dags, clear_db_runs, clear_db_xcom, clear_rendered_ti_fields

pytestmark = pytest.mark.db_test

Expand Down Expand Up @@ -70,6 +70,7 @@ def clean_db():
clear_db_runs()
clear_db_dags()
clear_rendered_ti_fields()
clear_db_xcom()

def setup_method(self):
self.clean_db()
Expand Down Expand Up @@ -340,7 +341,10 @@ def consumer_task(value):
ti, ti2 = dr.task_instances
ti.xcom_push(value=large_string, key="return_value")
rtif = RTIF(ti=ti2)
rtif.write()
rtif.write(session=session)
session.flush()
rtif = session.query(RTIF).filter(RTIF.dag_id == rtif.dag_id, RTIF.task_id == rtif.task_id).first()

assert rtif.rendered_fields == {
"op_args": "Value redacted as it is too large to be stored in the database. "
"You can change this behaviour in [core]max_templated_field_size",
Expand Down Expand Up @@ -371,7 +375,9 @@ def consumer_task(value):
ti, ti2 = dr.task_instances
ti.xcom_push(value=large_dataframe, key="return_value")
rtif = RTIF(ti=ti2)
rtif.write()
rtif.write(session=session)
session.flush()
rtif = session.query(RTIF).filter(RTIF.dag_id == rtif.dag_id, RTIF.task_id == rtif.task_id).first()
assert rtif.rendered_fields == {
"op_args": "Value redacted as it is too large to be stored in the database. "
"You can change this behaviour in [core]max_templated_field_size",
Expand Down

0 comments on commit bdd600e

Please sign in to comment.