Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(nebula): optimize AT128 decoder performance #37

Merged
merged 7 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
"vccint",
"adctp",
"Adctp",

"stds",
"nproc",
"nohup",
"schedutil"
]
}
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ build/
install/
log/

# profiling
profiling_output/

# Python
srv/_*.py
*.pyc
Expand Down
33 changes: 27 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,39 +37,39 @@ colcon build --symlink-install --cmake-args -DCMAKE_BUILD_TYPE=Release

Run tests:

```
```bash
colcon test --event-handlers console_cohesion+ --packages-above nebula_common
```

Show results:

```
```bash
colcon test-result --all
```

## Generic launch file

You can easily run the sensor hardware interface, the sensor hardware monitor and sensor driver using (e.g. Pandar64):

```
```bash
ros2 launch nebula_ros nebula_launch.py sensor_model:=Pandar64
```

If you don't want to launch the hardware (i.e. when you are working from a rosbag), set the `launch_hw` flag to false:

```
```bash
ros2 launch nebula_ros nebula_launch.py sensor_model:=Pandar64 launch_hw:=false
```

If you don't want the hardware driver to perform the sensor configuration communication (i.e. limited number of connections) set the `setup_sensor` flag to false:

```
```bash
ros2 launch nebula_ros nebula_launch.py sensor_model:=Pandar64 setup_sensor:=false
```

You should ideally provide a config file for your specific sensor, but default ones are provided `nebula_drivers/config`:

```
```bash
ros2 launch nebula_ros nebula_launch.py sensor_model:=Pandar64 config_file:=your_sensor.yaml
```

Expand Down Expand Up @@ -217,3 +217,24 @@ Parameters shared by all supported models:
## Software design overview

![DriverOrganization](docs/diagram.png)


## How to evaluate performance

You can evaluate Nebula performance on a given rosbag and sensor model using the below tools.
The profiling runner is most accurate when assigning isolated cores via the `-c <core_id>`.
CPU frequencies are locked/unlocked automatically by the runner to increase repeatability.

Run profiling for each version you want to compare:

```bash
./scripts/profiling_runner.bash baseline -m Pandar64 -b ~/my_rosbag -c 2 -t 20 -n 3
git checkout my_improved_branch
./scripts/profiling_runner.bash improved -m Pandar64 -b ~/my_rosbag -c 2 -t 20 -n 3
```
Show results:

```bash
pip3 install scripts/requirements.txt # first-time setup
python3 scripts/plot_times.py baseline improved
```
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ const uint16_t MAX_AZIMUTH_DEGREE_NUM = 36000;
const uint16_t LIDAR_AZIMUTH_UNIT = 256;
const uint32_t MAX_AZI_LEN = 36000 * 256;

/// @brief The resolution of one scan (in single return mode)
const uint32_t SCAN_POINTS_NUM = 1200 * LASER_COUNT;
/// @brief This sensor supports a maximum of 2 returns (= dual return)
const uint16_t MAX_RETURN_COUNT = 2;

/// @brief Hesai LiDAR decoder (AT128)
class PandarATDecoder : public HesaiScanDecoder
{
Expand Down Expand Up @@ -79,6 +84,10 @@ class PandarATDecoder : public HesaiScanDecoder
/// @param block_id target block
/// @return Point cloud
drivers::NebulaPointCloudPtr convert(size_t block_id) override;
/// @brief Convert to point cloud (without temporary buffer)
/// @param block_id target block
/// @param out_pc Point cloud to append the decoded points to
void convert(size_t block_id, NebulaPointCloudPtr & out_pc);
/// @brief Convert to point cloud for dual return
/// @param block_id target block
/// @return Point cloud
Expand All @@ -93,13 +102,8 @@ class PandarATDecoder : public HesaiScanDecoder
std::array<float, BLOCKS_PER_PACKET> block_offset_dual_{};
std::array<float, BLOCKS_PER_PACKET> block_offset_triple_{};

std::vector<float> m_elevation_rad_map_;
std::vector<float> m_sin_elevation_map_;
std::vector<float> m_cos_elevation_map_;

std::vector<float> m_azimuth_rad_map_;
std::vector<float> m_sin_azimuth_map_;
std::vector<float> m_cos_azimuth_map_;
std::vector<float> m_sin_map_;
std::vector<float> m_cos_map_;

Packet packet_{};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,12 @@ PandarATDecoder::PandarATDecoder(
azimuth_offset_[laser] = calibration_configuration->azimuth_offset_map[laser];
}

m_elevation_rad_map_.resize(MAX_AZI_LEN);
m_sin_elevation_map_.resize(MAX_AZI_LEN);
m_cos_elevation_map_.resize(MAX_AZI_LEN);
m_azimuth_rad_map_.resize(MAX_AZI_LEN);
m_sin_azimuth_map_.resize(MAX_AZI_LEN);
m_cos_azimuth_map_.resize(MAX_AZI_LEN);
m_sin_map_.resize(MAX_AZI_LEN);
m_cos_map_.resize(MAX_AZI_LEN);
for (size_t i = 0; i < MAX_AZI_LEN; ++i) {
m_elevation_rad_map_[i] = 2.f * i * M_PI / MAX_AZI_LEN;
m_sin_elevation_map_[i] = sinf(m_elevation_rad_map_[i]);
m_cos_elevation_map_[i] = cosf(m_elevation_rad_map_[i]);
m_azimuth_rad_map_[i] = 2.f * i * M_PI / MAX_AZI_LEN;
m_sin_azimuth_map_[i] = sinf(m_azimuth_rad_map_[i]);
m_cos_azimuth_map_[i] = cosf(m_azimuth_rad_map_[i]);
const float rad = 2.f * i * M_PI / MAX_AZI_LEN;
m_sin_map_[i] = sinf(rad);
m_cos_map_[i] = cosf(rad);
}

dual_return_distance_threshold_ = sensor_configuration_->dual_return_distance_threshold;
Expand All @@ -57,14 +50,16 @@ PandarATDecoder::PandarATDecoder(
last_field_ = -1;

scan_pc_.reset(new NebulaPointCloud);
scan_pc_->reserve(SCAN_POINTS_NUM * MAX_RETURN_COUNT);
overflow_pc_.reset(new NebulaPointCloud);
overflow_pc_->reserve(SCAN_POINTS_NUM * MAX_RETURN_COUNT);
}

bool PandarATDecoder::hasScanned() { return has_scanned_; }

std::tuple<drivers::NebulaPointCloudPtr, double> PandarATDecoder::get_pointcloud()
{
return std::make_tuple(scan_pc_, scan_timestamp_);
return std::make_tuple(overflow_pc_, scan_timestamp_);
}

int PandarATDecoder::unpack(const pandar_msgs::msg::PandarPacket & pandar_packet)
Expand All @@ -75,8 +70,6 @@ int PandarATDecoder::unpack(const pandar_msgs::msg::PandarPacket & pandar_packet
}

if (has_scanned_) {
scan_pc_ = overflow_pc_;
overflow_pc_.reset(new NebulaPointCloud);
has_scanned_ = false;
}

Expand All @@ -96,17 +89,19 @@ int PandarATDecoder::unpack(const pandar_msgs::msg::PandarPacket & pandar_packet
field = (field + 1) % correction_configuration_->frameNumber;
count++;
}

// The field (= mirror) has changed, meaning the previous scan is complete
if (last_field_ != field) {
if (max_azimuth_ < azimuth) {
max_azimuth_ = azimuth;
}
scan_timestamp_ = packet_.unix_second + static_cast<double>(packet_.usec) / 1000000.f;
auto block_pc = convert(block_id);
*overflow_pc_ += *block_pc;
std::swap(scan_pc_, overflow_pc_);
scan_pc_->points.clear();
convert(block_id, scan_pc_);
has_scanned_ = true;
} else {
auto block_pc = convert(block_id);
*scan_pc_ += *block_pc;
convert(block_id, scan_pc_);
}
last_azimuth_ = azimuth;
last_field_ = field;
Expand Down Expand Up @@ -171,10 +166,10 @@ void PandarATDecoder::CalcXTPointXYZIT(
point.distance = unit.distance;
point.elevation = 2.f * elevation * M_PI / MAX_AZI_LEN;
{
float xyDistance = unit.distance * m_cos_elevation_map_[elevation];
point.x = static_cast<float>(xyDistance * m_sin_azimuth_map_[azimuth]);
point.y = static_cast<float>(xyDistance * m_cos_azimuth_map_[azimuth]);
point.z = static_cast<float>(unit.distance * m_sin_elevation_map_[elevation]);
float xyDistance = unit.distance * m_cos_map_[elevation];
point.x = xyDistance * m_sin_map_[azimuth];
point.y = xyDistance * m_cos_map_[azimuth];
point.z = unit.distance * m_sin_map_[elevation];
}
if (scan_timestamp_ < 0) { // invalid timestamp
scan_timestamp_ = packet_.unix_second + static_cast<double>(packet_.usec) / 1000000.;
Expand Down Expand Up @@ -227,17 +222,18 @@ void PandarATDecoder::CalcXTPointXYZIT(
}
}

void PandarATDecoder::convert(size_t block_id, NebulaPointCloudPtr & out_pc) {
CalcXTPointXYZIT(block_id, static_cast<int>(packet_.header.chLaserNumber), out_pc);
}

drivers::NebulaPointCloudPtr PandarATDecoder::convert(size_t block_id)
{
NebulaPointCloudPtr block_pc(new NebulaPointCloud);
CalcXTPointXYZIT(block_id, static_cast<int>(packet_.header.chLaserNumber), block_pc);

return block_pc;
throw std::runtime_error("Not implemented");
}

drivers::NebulaPointCloudPtr PandarATDecoder::convert_dual(size_t block_id)
{
return convert(block_id);
throw std::runtime_error("Not implemented");
}

bool PandarATDecoder::parsePacket(const pandar_msgs::msg::PandarPacket & pandar_packet)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include <rclcpp/rclcpp.hpp>
#include <rclcpp_components/register_node_macro.hpp>

#include <chrono>

#include "pandar_msgs/msg/pandar_packet.hpp"
#include "pandar_msgs/msg/pandar_scan.hpp"

Expand Down
6 changes: 6 additions & 0 deletions nebula_ros/launch/nebula_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def launch_setup(context, *args, **kwargs):
target_container=LaunchConfiguration("container"),
)

container_kwargs = {}
if LaunchConfiguration("debug_logging").perform(context) == "true":
container_kwargs["ros_arguments"] = ['--log-level', 'debug']

container = ComposableNodeContainer(
name="nebula_ros_node",
namespace="",
Expand All @@ -110,6 +114,7 @@ def launch_setup(context, *args, **kwargs):
composable_node_descriptions=nodes,
output="screen",
condition=LaunchConfigurationEquals("container", ""),
**container_kwargs,
)

group = GroupAction(
Expand All @@ -135,6 +140,7 @@ def add_launch_arg(name: str, default_value=None):
add_launch_arg("return_mode", "Dual"),
add_launch_arg("launch_hw", "true"),
add_launch_arg("setup_sensor", "true"),
add_launch_arg("debug_logging", "false"),
]
+ [OpaqueFunction(function=launch_setup)]
)
6 changes: 4 additions & 2 deletions nebula_ros/src/hesai/hesai_decoder_ros_wrapper.cpp
mojomex marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ HesaiDriverRosWrapper::HesaiDriverRosWrapper(const rclcpp::NodeOptions & options
void HesaiDriverRosWrapper::ReceiveScanMsgCallback(
const pandar_msgs::msg::PandarScan::SharedPtr scan_msg)
{
// take packets out of scan msg
std::vector<pandar_msgs::msg::PandarPacket> pkt_msgs = scan_msg->packets;
auto t_start = std::chrono::high_resolution_clock::now();

std::tuple<nebula::drivers::NebulaPointCloudPtr, double> pointcloud_ts =
driver_ptr_->ConvertScanToPointcloud(scan_msg);
Expand Down Expand Up @@ -98,6 +97,9 @@ void HesaiDriverRosWrapper::ReceiveScanMsgCallback(
rclcpp::Time(SecondsToChronoNanoSeconds(std::get<1>(pointcloud_ts)).count());
PublishCloud(std::move(ros_pc_msg_ptr), aw_points_ex_pub_);
}

auto runtime = std::chrono::high_resolution_clock::now() - t_start;
RCLCPP_DEBUG(get_logger(), "PROFILING {'d_total': %lu, 'n_out': %lu}", runtime.count(), pointcloud->size());
}

void HesaiDriverRosWrapper::PublishCloud(
Expand Down
88 changes: 88 additions & 0 deletions scripts/plot_times.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#%%

import os
import pandas as pd
import re
import glob
import argparse
from matplotlib import pyplot as plt

def parse_logs(run_name):
dfs = []
script_dir = os.path.dirname(os.path.abspath(__file__))
glob_pattern = os.path.join(script_dir, f"../profiling_output/{run_name}-[0-9].log")
for path in glob.glob(glob_pattern):
with open(path) as f:
lines = f.readlines()
lines = filter(lambda line: "PROFILING" in line, lines)
lines = [re.sub(r'.*PROFILING (\{.*?\}).*', r'\1', line) for line in lines]
records = [eval(line) for line in lines]

dfs.append(pd.DataFrame(records))

df = pd.concat(dfs)

for col in [c for c in df.columns if c.startswith("d_")]:
df[col] /= 1e6 # ns to ms
df['d_total'] = sum([df[c] for c in df.columns if c.startswith("d_")]) # type: ignore
return df

def plot_timing_comparison(run_names):
scenario_dfs = {
run_name: parse_logs(run_name) for run_name in run_names
}

n_cols = sum(1 for c in next(iter(scenario_dfs.values())).columns if c.startswith("d_"))

fig, axs = plt.subplots(1 + n_cols, figsize=(10, 10), num="Timing comparison")
ax_d = axs[0]
ax_n = ax_d.twinx()
ax_n.set_ylabel("# points published")
boxes = axs[1:]

for i, (label, df) in enumerate(scenario_dfs.items()):
durations = df['d_total']

ax_d.plot(durations.index, durations, label=label, linestyle='', marker='.')
for col in filter(lambda col: col.startswith("n_"), df.columns):
ax_n.plot(df.index, df[col], label=f"{label}::{col}", linestyle='', marker='.', color='black')

d_columns = [col for col in df.columns if col.startswith("d_")]
n_cols = len(d_columns)
for j, col in enumerate(d_columns):
if (i == 0):
boxes[j].set_ylabel(f"{col} (ms)")
boxes[j].boxplot(df[col], positions=[i], labels=[label])

ax_d.legend(loc="upper right")
ax_d.set_ylabel("time (ms)")
ax_d.set_xlabel("iteration")

df_means = pd.DataFrame({
label: df.describe().loc["mean"] for label, df in scenario_dfs.items()
})

df_stds = pd.DataFrame({
label: df.describe().loc["std"] for label, df in scenario_dfs.items()
})

df_means = df_means.rename(index=lambda label: f"{label} AVG")
df_stds = df_stds.rename(index=lambda label: f"{label} STD")

df_stat = pd.concat([df_means, df_stds], axis=0)

baseline_name = run_names[0]
df_base = df_stat[baseline_name]
for label in df_base.index:
df_stat.loc[f"{label} % rel to {baseline_name}", :] = df_stat.loc[label, :] / df_base.loc[label] * 100

print(df_stat.to_markdown())
plt.show()
# %%

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("run_names", nargs='+', help="The names of the runs to plot, as entered in test_runner.bash (e.g. 'baseline', 'optimized')\nAll files of the form <run-name>-[0-9].log will be plotted.")
args = parser.parse_args()

plot_timing_comparison(args.run_names)
Loading
Loading