Skip to content

Commit

Permalink
fix(system_monitor): separate S.M.A.R.T. request and lazy unmount req…
Browse files Browse the repository at this point in the history
…uest for hdd_reader

Signed-off-by: v-nakayama7440-esol <[email protected]>
  • Loading branch information
v-nakayama7440-esol committed Sep 16, 2022
1 parent b679b8a commit c97b8bc
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 103 deletions.
34 changes: 29 additions & 5 deletions system/system_monitor/include/hdd_reader/hdd_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@
#include <map>
#include <string>

/**
* @brief Enumeration of Request ID to hdd_reader
*/
enum HDDReaderRequestID {
GetHDDInfo,
UnmountDevice,
};

/**
* @brief HDD device
*/
Expand All @@ -40,9 +48,6 @@ struct HDDDevice
total_data_written_attribute_id_; //!< @brief S.M.A.R.T attribute ID of total data written
uint8_t recovered_error_attribute_id_; //!< @brief S.M.A.R.T attribute ID of recovered error

uint8_t unmount_request_flag_; //!< @brief unmount request flag
std::string part_device_; //!< @brief partition device

/**
* @brief Load or save data members.
* @param [inout] ar archive reference to load or save the serialized data members
Expand All @@ -58,8 +63,6 @@ struct HDDDevice
ar & power_on_hours_attribute_id_;
ar & total_data_written_attribute_id_;
ar & recovered_error_attribute_id_;
ar & unmount_request_flag_;
ar & part_device_;
}
};

Expand Down Expand Up @@ -106,6 +109,27 @@ struct HDDInfo
}
};

/**
* @brief unmount device information
*/
struct UnmountDeviceInfo
{
std::string part_device_; //!< @brief partition device

/**
* @brief Load or save data members.
* @param [inout] ar archive reference to load or save the serialized data members
* @param [in] version version for the archive
* @note NOLINT syntax is needed since this is an interface to serialization and
* used inside boost serialization.
*/
template <typename archive>
void serialize(archive & ar, const unsigned /*version*/) // NOLINT(runtime/references)
{
ar & part_device_;
}
};

/**
* @brief HDD information list
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,13 @@ class HDDMonitor : public rclcpp::Node
*/
void updateHDDConnections();

/**
* @brief unmount device
* @param [in] device device name
* @return result of success or failure
*/
int unmountDevice(std::string & device);

diagnostic_updater::Updater updater_; //!< @brief Updater class which advertises to /diagnostics
rclcpp::TimerBase::SharedPtr timer_; //!< @brief timer to get HDD information from HDDReader

Expand All @@ -322,8 +329,6 @@ class HDDMonitor : public rclcpp::Node
std::map<std::string, HDDParam> hdd_params_; //!< @brief list of error and warning levels
std::map<std::string, bool>
hdd_connected_flags_; //!< @brief list of flag whether HDD is connected
std::map<std::string, bool>
device_unmount_request_flags_; //!< @brief list of flag requesting device unmount
std::map<std::string, uint32_t>
initial_recovered_errors_; //!< @brief list of initial recovered error count
std::map<std::string, HDDStat> hdd_stats_; //!< @brief list of HDD statistics
Expand Down
212 changes: 131 additions & 81 deletions system/system_monitor/reader/hdd_reader/hdd_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -407,27 +407,127 @@ int get_nvme_SMARTData(int fd, HDDInfo * info)
return EXIT_SUCCESS;
}

/**
* @brief get HDD information
* @param [in] boost::archive::text_iarchive object
* @param [out] boost::archive::text_oarchive object
* @return 0 on success, otherwise error
*/
int get_hdd_info(boost::archive::text_iarchive & ia, boost::archive::text_oarchive & oa)
{
std::vector<HDDDevice> hdd_devices;
HDDInfoList list;

try {
ia & hdd_devices;
} catch (const std::exception & e) {
syslog(LOG_ERR, "exception. %s\n", e.what());
return -1;
}

for (auto & hdd_device : hdd_devices) {
HDDInfo info{};

// Open a file
int fd = open(hdd_device.name_.c_str(), O_RDONLY);
if (fd < 0) {
info.error_code_ = errno;
syslog(LOG_ERR, "Failed to open a file. %s\n", strerror(info.error_code_));
continue;
}

// AHCI device
if (boost::starts_with(hdd_device.name_.c_str(), "/dev/sd")) {
// Get IDENTIFY DEVICE for ATA drive
info.error_code_ = get_ata_identify(fd, &info);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get IDENTIFY DEVICE for ATA drive. %s\n", strerror(info.error_code_));
close(fd);
continue;
}
// Get SMART DATA for ATA drive
info.error_code_ = get_ata_SMARTData(fd, &info, hdd_device);
if (info.error_code_ != 0) {
syslog(LOG_ERR, "Failed to get SMART LOG for ATA drive. %s\n", strerror(info.error_code_));
close(fd);
continue;
}
} else if (boost::starts_with(hdd_device.name_.c_str(), "/dev/nvme")) { // NVMe device
// Get Identify for NVMe drive
info.error_code_ = get_nvme_identify(fd, &info);
if (info.error_code_ != 0) {
syslog(LOG_ERR, "Failed to get Identify for NVMe drive. %s\n", strerror(info.error_code_));
close(fd);
continue;
}
// Get SMART / Health Information for NVMe drive
info.error_code_ = get_nvme_SMARTData(fd, &info);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get SMART / Health Information for NVMe drive. %s\n",
strerror(info.error_code_));
close(fd);
continue;
}
}

// Close the file descriptor FD
info.error_code_ = close(fd);
if (info.error_code_ < 0) {
info.error_code_ = errno;
syslog(LOG_ERR, "Failed to close the file descriptor FD. %s\n", strerror(info.error_code_));
}

list[hdd_device.name_] = info;
}

oa << list;
return 0;
}

/**
* @brief unmount device with lazy option
* @param [in] device device name
* @param [in] boost::archive::text_iarchive object
* @param [out] boost::archive::text_oarchive object
* @return 0 on success, otherwise error
*/
void unmount_device_with_lazy(std::string & device)
int unmount_device_with_lazy(boost::archive::text_iarchive & ia, boost::archive::text_oarchive & oa)
{
boost::process::ipstream is_out;
boost::process::ipstream is_err;
std::vector<UnmountDeviceInfo> unmount_devices;
std::vector<int> responses;

try {
ia & unmount_devices;
} catch (const std::exception & e) {
syslog(LOG_ERR, "exception. %s\n", e.what());
return -1;
}

for (auto & unmount_device : unmount_devices) {
int ret = 0;
boost::process::ipstream is_out;
boost::process::ipstream is_err;

boost::process::child c(
"/bin/sh", "-c", fmt::format("umount -l {}", device.c_str()), boost::process::std_out > is_out,
boost::process::std_err > is_err);
c.wait();
boost::process::child c(
"/bin/sh", "-c", fmt::format("umount -l {}", unmount_device.part_device_.c_str()),
boost::process::std_out > is_out, boost::process::std_err > is_err);
c.wait();

if (c.exit_code() != 0) {
syslog(LOG_ERR, "Failed to execute umount command. %s\n", device.c_str());
if (c.exit_code() != 0) {
syslog(
LOG_ERR, "Failed to execute umount command. %s\n", unmount_device.part_device_.c_str());
ret = -1;
}
responses.push_back(ret);
}

oa << responses;
return 0;
}

/**
* @brief check HDD temperature
* @brief hdd_reader main procedure
* @param [in] port port to listen
*/
void run(int port)
Expand Down Expand Up @@ -500,91 +600,41 @@ void run(int port)
return;
}

// Restore list of devices
std::vector<HDDDevice> hdd_devices;
uint8_t request_id;

buf[sizeof(buf) - 1] = '\0';
std::istringstream iss(buf);
boost::archive::text_iarchive ia(iss);

try {
buf[sizeof(buf) - 1] = '\0';
std::istringstream iss(buf);
boost::archive::text_iarchive oa(iss);
oa & hdd_devices;
ia & request_id;
} catch (const std::exception & e) {
syslog(LOG_ERR, "exception. %s\n", e.what());
close(new_sock);
close(sock);
return;
}

HDDInfoList list;
std::ostringstream oss;
boost::archive::text_oarchive oa(oss);

for (auto & hdd_device : hdd_devices) {
if (hdd_device.unmount_request_flag_) {
unmount_device_with_lazy(hdd_device.part_device_);
continue;
}

HDDInfo info{};

// Open a file
int fd = open(hdd_device.name_.c_str(), O_RDONLY);
if (fd < 0) {
info.error_code_ = errno;
syslog(LOG_ERR, "Failed to open a file. %s\n", strerror(info.error_code_));
switch (request_id) {
case HDDReaderRequestID::GetHDDInfo:
ret = get_hdd_info(ia, oa);
break;
case HDDReaderRequestID::UnmountDevice:
ret = unmount_device_with_lazy(ia, oa);
break;
default:
syslog(LOG_ERR, "Request ID is invalid. %d\n", request_id);
continue;
}

// AHCI device
if (boost::starts_with(hdd_device.name_.c_str(), "/dev/sd")) {
// Get IDENTIFY DEVICE for ATA drive
info.error_code_ = get_ata_identify(fd, &info);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get IDENTIFY DEVICE for ATA drive. %s\n",
strerror(info.error_code_));
close(fd);
continue;
}
// Get SMART DATA for ATA drive
info.error_code_ = get_ata_SMARTData(fd, &info, hdd_device);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get SMART LOG for ATA drive. %s\n", strerror(info.error_code_));
close(fd);
continue;
}
} else if (boost::starts_with(hdd_device.name_.c_str(), "/dev/nvme")) { // NVMe device
// Get Identify for NVMe drive
info.error_code_ = get_nvme_identify(fd, &info);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get Identify for NVMe drive. %s\n", strerror(info.error_code_));
close(fd);
continue;
}
// Get SMART / Health Information for NVMe drive
info.error_code_ = get_nvme_SMARTData(fd, &info);
if (info.error_code_ != 0) {
syslog(
LOG_ERR, "Failed to get SMART / Health Information for NVMe drive. %s\n",
strerror(info.error_code_));
close(fd);
continue;
}
}

// Close the file descriptor FD
info.error_code_ = close(fd);
if (info.error_code_ < 0) {
info.error_code_ = errno;
syslog(LOG_ERR, "Failed to close the file descriptor FD. %s\n", strerror(info.error_code_));
}

list[hdd_device.name_] = info;
}
if (ret != 0) {
close(new_sock);
close(sock);
return;
}

oa << list;
// Write N bytes of BUF to FD
ret = write(new_sock, oss.str().c_str(), oss.str().length());
if (ret < 0) {
Expand Down
Loading

0 comments on commit c97b8bc

Please sign in to comment.