diff --git a/src/ray/gcs/gcs_server/gcs_job_manager.cc b/src/ray/gcs/gcs_server/gcs_job_manager.cc index a73d4bf85378..bcf2e8b25640 100644 --- a/src/ray/gcs/gcs_server/gcs_job_manager.cc +++ b/src/ray/gcs/gcs_server/gcs_job_manager.cc @@ -115,7 +115,7 @@ void GcsJobManager::HandleAddJob(rpc::AddJobRequest request, // multiple times and requires idempotency (i.e. due to retry). running_job_ids_.insert(job_id); } - WriteDriverJobExportEvent(mutable_job_table_data); + WriteDriverJobExportEvent(job_table_data); GCS_RPC_SEND_REPLY(send_reply_callback, reply, status); }; @@ -146,6 +146,13 @@ void GcsJobManager::MarkJobAsFinished(rpc::JobTableData job_table_data, } function_manager_.RemoveJobReference(job_id); WriteDriverJobExportEvent(job_table_data); + + // Update running job status. + auto iter = running_job_ids_.find(job_id); + RAY_CHECK(iter != running_job_ids_.end()); + running_job_ids_.erase(iter); + ++finished_jobs_count_; + done_callback(status); }; diff --git a/src/ray/gcs/gcs_server/gcs_job_manager.h b/src/ray/gcs/gcs_server/gcs_job_manager.h index 63de6dceb7a4..b74b24f3e1d1 100644 --- a/src/ray/gcs/gcs_server/gcs_job_manager.h +++ b/src/ray/gcs/gcs_server/gcs_job_manager.h @@ -96,6 +96,11 @@ class GcsJobManager : public rpc::JobInfoHandler { void WriteDriverJobExportEvent(rpc::JobTableData job_data) const; + /// Record metrics. + /// For job manager, (1) running jobs count gauge and (2) new finished jobs (whether + /// succeed or fail) will be reported periodically. + void RecordMetrics(); + private: void ClearJobInfos(const rpc::JobTableData &job_data);