Skip to content

Commit

Permalink
Fix AwsTaskLogFetcher missing logs (apache#41515)
Browse files Browse the repository at this point in the history
  • Loading branch information
vincbeck authored and romsharon98 committed Aug 20, 2024
1 parent dc832ea commit 779af40
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions airflow/providers/amazon/aws/utils/task_log_fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,20 @@ def run(self) -> None:
while not self.is_stopped():
time.sleep(self.fetch_interval.total_seconds())
log_events = self._get_log_events(continuation_token)
prev_timestamp_event = None
for log_event in log_events:
current_timestamp_event = datetime.fromtimestamp(
log_event["timestamp"] / 1000.0, tz=timezone.utc
)
if current_timestamp_event == prev_timestamp_event:
# When multiple events have the same timestamp, somehow, only one event is logged
# As a consequence, some logs are missed in the log group (in case they have the same
# timestamp)
# When a slight delay is added before logging the event, that solves the issue
# See https://github.com/apache/airflow/issues/40875
time.sleep(0.1)
self.logger.info(self.event_to_str(log_event))
prev_timestamp_event = current_timestamp_event

def _get_log_events(self, skip_token: AwsLogsHook.ContinuationToken | None = None) -> Generator:
if skip_token is None:
Expand Down

0 comments on commit 779af40

Please sign in to comment.