Skip to content

Commit

Permalink
Revert "[observability][export-api] Write actor events" (ray-project#…
Browse files Browse the repository at this point in the history
…47516)

Reverts ray-project#47303

Signed-off-by: ujjawal-khare <[email protected]>
  • Loading branch information
can-anyscale authored and ujjawal-khare committed Oct 15, 2024
1 parent eca534a commit 184e293
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 114 deletions.
40 changes: 2 additions & 38 deletions src/ray/gcs/gcs_server/gcs_actor_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -241,36 +241,6 @@ const rpc::ActorTableData &GcsActor::GetActorTableData() const {

rpc::ActorTableData *GcsActor::GetMutableActorTableData() { return &actor_table_data_; }

void GcsActor::WriteActorExportEvent() const {
/// Write actor_table_data_ as a export actor event if
/// enable_export_api_write() is enabled.
if (!RayConfig::instance().enable_export_api_write()) {
return;
}
std::shared_ptr<rpc::ExportActorData> export_actor_data_ptr =
std::make_shared<rpc::ExportActorData>();

export_actor_data_ptr->set_actor_id(actor_table_data_.actor_id());
export_actor_data_ptr->set_job_id(actor_table_data_.job_id());
export_actor_data_ptr->set_state(ConvertActorStateToExport(actor_table_data_.state()));
export_actor_data_ptr->set_is_detached(actor_table_data_.is_detached());
export_actor_data_ptr->set_name(actor_table_data_.name());
export_actor_data_ptr->set_pid(actor_table_data_.pid());
export_actor_data_ptr->set_ray_namespace(actor_table_data_.ray_namespace());
export_actor_data_ptr->set_serialized_runtime_env(
actor_table_data_.serialized_runtime_env());
export_actor_data_ptr->set_class_name(actor_table_data_.class_name());
export_actor_data_ptr->mutable_death_cause()->CopyFrom(actor_table_data_.death_cause());
export_actor_data_ptr->mutable_required_resources()->insert(
actor_table_data_.required_resources().begin(),
actor_table_data_.required_resources().end());
export_actor_data_ptr->set_node_id(actor_table_data_.node_id());
export_actor_data_ptr->set_placement_group_id(actor_table_data_.placement_group_id());
export_actor_data_ptr->set_repr_name(actor_table_data_.repr_name());

RayExportEvent(export_actor_data_ptr).SendEvent();
}

rpc::TaskSpec *GcsActor::GetMutableTaskSpec() { return task_spec_.get(); }

const ResourceRequest &GcsActor::GetAcquiredResources() const {
Expand Down Expand Up @@ -800,7 +770,6 @@ Status GcsActorManager::RegisterActor(const ray::rpc::RegisterActorRequest &requ
[this, actor](const Status &status) {
// The backend storage is supposed to be reliable, so the status must be ok.
RAY_CHECK_OK(status);
actor->WriteActorExportEvent();
// If a creator dies before this callback is called, the actor could have
// been already destroyed. It is okay not to invoke a callback because we
// don't need to reply to the creator as it is already dead.
Expand Down Expand Up @@ -897,7 +866,6 @@ Status GcsActorManager::CreateActor(const ray::rpc::CreateActorRequest &request,

// Pub this state for dashboard showing.
RAY_CHECK_OK(gcs_publisher_->PublishActor(actor_id, actor_table_data, nullptr));
actor->WriteActorExportEvent();
RemoveUnresolvedActor(actor);

// Update the registered actor as its creation task specification may have changed due
Expand Down Expand Up @@ -1107,11 +1075,10 @@ void GcsActorManager::DestroyActor(const ActorID &actor_id,
RAY_CHECK_OK(gcs_table_storage_->ActorTable().Put(
actor->GetActorID(),
*actor_table_data,
[this, actor, actor_id, actor_table_data](Status status) {
[this, actor_id, actor_table_data](Status status) {
RAY_CHECK_OK(gcs_publisher_->PublishActor(
actor_id, *GenActorDataOnlyWithStates(*actor_table_data), nullptr));
RAY_CHECK_OK(gcs_table_storage_->ActorTaskSpecTable().Delete(actor_id, nullptr));
actor->WriteActorExportEvent();
// Destroy placement group owned by this actor.
destroy_owned_placement_group_if_needed_(actor_id);
}));
Expand Down Expand Up @@ -1399,10 +1366,9 @@ void GcsActorManager::RestartActor(const ActorID &actor_id,
RAY_CHECK_OK(gcs_table_storage_->ActorTable().Put(
actor_id,
*mutable_actor_table_data,
[this, actor, actor_id, mutable_actor_table_data](Status status) {
[this, actor_id, mutable_actor_table_data](Status status) {
RAY_CHECK_OK(gcs_publisher_->PublishActor(
actor_id, *GenActorDataOnlyWithStates(*mutable_actor_table_data), nullptr));
actor->WriteActorExportEvent();
}));
gcs_actor_scheduler_->Schedule(actor);
} else {
Expand Down Expand Up @@ -1432,7 +1398,6 @@ void GcsActorManager::RestartActor(const ActorID &actor_id,
actor_id, *GenActorDataOnlyWithStates(*mutable_actor_table_data), nullptr));
RAY_CHECK_OK(
gcs_table_storage_->ActorTaskSpecTable().Delete(actor_id, nullptr));
actor->WriteActorExportEvent();
}));
// The actor is dead, but we should not remove the entry from the
// registered actors yet. If the actor is owned, we will destroy the actor
Expand Down Expand Up @@ -1540,7 +1505,6 @@ void GcsActorManager::OnActorCreationSuccess(const std::shared_ptr<GcsActor> &ac
[this, actor_id, actor_table_data, actor, reply](Status status) {
RAY_CHECK_OK(gcs_publisher_->PublishActor(
actor_id, *GenActorDataOnlyWithStates(actor_table_data), nullptr));
actor->WriteActorExportEvent();
// Invoke all callbacks for all registration requests of this actor (duplicated
// requests are included) and remove all of them from
// actor_to_create_callbacks_.
Expand Down
25 changes: 0 additions & 25 deletions src/ray/gcs/gcs_server/gcs_actor_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
#include "ray/rpc/gcs_server/gcs_rpc_server.h"
#include "ray/rpc/worker/core_worker_client.h"
#include "ray/util/counter_map.h"
#include "ray/util/event.h"
#include "src/ray/protobuf/gcs_service.pb.h"

namespace ray {
Expand Down Expand Up @@ -189,9 +188,6 @@ class GcsActor {
/// Get the mutable ActorTableData of this actor.
rpc::ActorTableData *GetMutableActorTableData();
rpc::TaskSpec *GetMutableTaskSpec();
/// Write an event containing this actor's ActorTableData
/// to file for the Export API.
void WriteActorExportEvent() const;

const ResourceRequest &GetAcquiredResources() const;
void SetAcquiredResources(ResourceRequest &&resource_request);
Expand All @@ -218,27 +214,6 @@ class GcsActor {
last_metric_state_ = cur_state;
}

rpc::ExportActorData::ActorState ConvertActorStateToExport(
rpc::ActorTableData::ActorState actor_state) const {
switch (actor_state) {
case rpc::ActorTableData::DEPENDENCIES_UNREADY:
return rpc::ExportActorData::DEPENDENCIES_UNREADY;
case rpc::ActorTableData::PENDING_CREATION:
return rpc::ExportActorData::PENDING_CREATION;
case rpc::ActorTableData::ALIVE:
return rpc::ExportActorData::ALIVE;
case rpc::ActorTableData::RESTARTING:
return rpc::ExportActorData::RESTARTING;
case rpc::ActorTableData::DEAD:
return rpc::ExportActorData::DEAD;
default:
// Unknown rpc::ActorTableData::ActorState value
RAY_LOG(FATAL) << "Invalid value for rpc::ActorTableData::ActorState"
<< rpc::ActorTableData::ActorState_Name(actor_state);
return rpc::ExportActorData::DEAD;
}
}

/// The actor meta data which contains the task specification as well as the state of
/// the gcs actor and so on (see gcs.proto).
rpc::ActorTableData actor_table_data_;
Expand Down
5 changes: 2 additions & 3 deletions src/ray/gcs/gcs_server/gcs_server_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,12 @@ int main(int argc, char *argv[]) {

// Initialize event framework.
if (RayConfig::instance().event_log_reporter_enabled() && !log_dir.empty()) {
// This GCS server process emits GCS standard events, and Node and Actor export events
// This GCS server process emits GCS standard events, and Node export events
// so the various source types are passed to RayEventInit. The type of an
// event is determined by the schema of its event data.
const std::vector<ray::SourceTypeVariant> source_types = {
ray::rpc::Event_SourceType::Event_SourceType_GCS,
ray::rpc::ExportEvent_SourceType::ExportEvent_SourceType_EXPORT_NODE,
ray::rpc::ExportEvent_SourceType_EXPORT_ACTOR};
ray::rpc::ExportEvent_SourceType::ExportEvent_SourceType_EXPORT_NODE};
ray::RayEventInit(source_types,
absl::flat_hash_map<std::string, std::string>(),
log_dir,
Expand Down
43 changes: 1 addition & 42 deletions src/ray/gcs/gcs_server/test/gcs_actor_manager_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <chrono>
#include <memory>
#include <thread>

// clang-format off
#include "gtest/gtest.h"
Expand All @@ -26,7 +24,6 @@
#include "mock/ray/gcs/gcs_server/gcs_kv_manager.h"
#include "mock/ray/gcs/gcs_server/gcs_node_manager.h"
#include "mock/ray/pubsub/publisher.h"
#include "ray/util/event.h"
// clang-format on

namespace ray {
Expand Down Expand Up @@ -121,8 +118,7 @@ class GcsActorManagerTest : public ::testing::Test {
RayConfig::instance().initialize(
R"(
{
"maximum_gcs_destroyed_actor_cached_count": 10,
"enable_export_api_write": true
"maximum_gcs_destroyed_actor_cached_count": 10
}
)");
std::promise<bool> promise;
Expand Down Expand Up @@ -164,13 +160,11 @@ class GcsActorManagerTest : public ::testing::Test {
auto job_id = JobID::FromInt(i);
job_namespace_table_[job_id] = "";
}
log_dir_ = "event_123";
}

virtual ~GcsActorManagerTest() {
io_service_.stop();
thread_io_service_->join();
std::filesystem::remove_all(log_dir_.c_str());
}

void WaitActorCreated(const ActorID &actor_id) {
Expand Down Expand Up @@ -292,13 +286,9 @@ class GcsActorManagerTest : public ::testing::Test {
std::unique_ptr<gcs::GcsFunctionManager> function_manager_;
std::unique_ptr<gcs::MockInternalKVInterface> kv_;
PeriodicalRunner periodical_runner_;
std::string log_dir_;
};

TEST_F(GcsActorManagerTest, TestBasic) {
std::vector<SourceTypeVariant> source_types = {
rpc::ExportEvent_SourceType::ExportEvent_SourceType_EXPORT_ACTOR};
RayEventInit(source_types, absl::flat_hash_map<std::string, std::string>(), log_dir_);
auto job_id = JobID::FromInt(1);
auto registered_actor = RegisterActor(job_id);
rpc::CreateActorRequest create_actor_request;
Expand Down Expand Up @@ -333,37 +323,6 @@ TEST_F(GcsActorManagerTest, TestBasic) {
ASSERT_EQ(actor->GetState(), rpc::ActorTableData::DEAD);
RAY_CHECK_EQ(gcs_actor_manager_->CountFor(rpc::ActorTableData::ALIVE, ""), 0);
RAY_CHECK_EQ(gcs_actor_manager_->CountFor(rpc::ActorTableData::DEAD, ""), 1);

// Check correct export events are written for each of the 4 state transitions
int num_retry = 5;
int num_export_events = 4;
std::vector<std::string> expected_states = {
"DEPENDENCIES_UNREADY", "PENDING_CREATION", "ALIVE", "DEAD"};
std::vector<std::string> vc;
for (int i = 0; i < num_retry; i++) {
Mocker::ReadContentFromFile(vc, log_dir_ + "/events/event_EXPORT_ACTOR.log");
if ((int)vc.size() == num_export_events) {
for (int event_idx = 0; event_idx < num_export_events; event_idx++) {
json export_event_as_json = json::parse(vc[event_idx]);
json event_data = export_event_as_json["event_data"].get<json>();
ASSERT_EQ(event_data["state"], expected_states[event_idx]);
if (event_idx == num_export_events - 1) {
// Verify death cause for last actor DEAD event
ASSERT_EQ(
event_data["death_cause"]["actor_died_error_context"]["error_message"],
"The actor is dead because all references to the actor were removed.");
}
}
return;
} else {
// Sleep and retry
std::this_thread::sleep_for(std::chrono::seconds(1));
vc.clear();
}
}
Mocker::ReadContentFromFile(vc, log_dir_ + "/events/event_EXPORT_ACTOR.log");
ASSERT_TRUE(false) << "Export API only wrote " << (int)vc.size()
<< " lines, but expecting 4.\n";
}

TEST_F(GcsActorManagerTest, TestDeadCount) {
Expand Down
6 changes: 0 additions & 6 deletions src/ray/util/event.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,6 @@ std::string LogEventReporter::ExportEventToString(const rpc::ExportEvent &export
j["source_type"] = ExportEvent_SourceType_Name(export_event.source_type());
std::string event_data_as_string;
google::protobuf::util::JsonPrintOptions options;
// Required so enum with value 0 is not omitted
options.always_print_primitive_fields = true;
options.preserve_proto_field_names = true;
// Required so enum with value 0 is not omitted
options.always_print_primitive_fields = true;
Expand All @@ -142,10 +140,6 @@ std::string LogEventReporter::ExportEventToString(const rpc::ExportEvent &export
RAY_CHECK(google::protobuf::util::MessageToJsonString(
export_event.node_event_data(), &event_data_as_string, options)
.ok());
} else if (export_event.has_actor_event_data()) {
RAY_CHECK(google::protobuf::util::MessageToJsonString(
export_event.actor_event_data(), &event_data_as_string, options)
.ok());
} else {
RAY_LOG(FATAL)
<< "event_data missing from export event with id " << export_event.event_id()
Expand Down

0 comments on commit 184e293

Please sign in to comment.