From 63802d0607edbc5520c336e02857990b089ac26e Mon Sep 17 00:00:00 2001 From: "Stein M. Nornes" Date: Thu, 5 Dec 2019 11:46:36 +0100 Subject: [PATCH] refactor(doTransform): 10-100x faster transform of pointcloud It seems like generating an Eigen::Vector3f and transforming it for each iteration is very inefficient. In our case, this mod took transform-time of a point-cloud from 90-100ms to 1-2ms. Verified to give same result as old doTransform using: ``` sensor_msgs::PointCloud2ConstIterator x_old(cloud1, "x"); sensor_msgs::PointCloud2ConstIterator y_old(cloud1, "y"); sensor_msgs::PointCloud2ConstIterator z_old(cloud1, "z"); sensor_msgs::PointCloud2ConstIterator x_new(cloud2, "x"); sensor_msgs::PointCloud2ConstIterator y_new(cloud2, "y"); sensor_msgs::PointCloud2ConstIterator z_new(cloud2, "z"); std::vector compare_vector; for (; x_new != x_new.end(); ++x_new, ++y_new, ++z_new, ++x_old, ++y_old, ++z_old) { compare_vector.push_back(*x_new - *x_old); compare_vector.push_back(*y_new - *y_old); compare_vector.push_back(*z_new - *z_old); } double max_diff = *max_element(compare_vector.begin(), compare_vector.end()); double min_diff = *min_element(compare_vector.begin(), compare_vector.end()); ROS_INFO("Biggest differences: %f, %f", max_diff, min_diff); ``` Not sure how/where to put this test-code in a proper test, so I'll leave it here until I get some feedback. --- .../include/tf2_sensor_msgs/tf2_sensor_msgs.h | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tf2_sensor_msgs/include/tf2_sensor_msgs/tf2_sensor_msgs.h b/tf2_sensor_msgs/include/tf2_sensor_msgs/tf2_sensor_msgs.h index 9e16e0d2e..2b618a40f 100644 --- a/tf2_sensor_msgs/include/tf2_sensor_msgs/tf2_sensor_msgs.h +++ b/tf2_sensor_msgs/include/tf2_sensor_msgs/tf2_sensor_msgs.h @@ -83,12 +83,25 @@ void doTransform(const sensor_msgs::PointCloud2 &p_in, sensor_msgs::PointCloud2 sensor_msgs::PointCloud2Iterator y_out(p_out, "y"); sensor_msgs::PointCloud2Iterator z_out(p_out, "z"); - Eigen::Vector3f point; + // Using individual matrix elements directly is apparently faster than relying on Eigen + double r11 = t(0, 0); + double r12 = t(0, 1); + double r13 = t(0, 2); + double r21 = t(1, 0); + double r22 = t(1, 1); + double r23 = t(1, 2); + double r31 = t(2, 0); + double r32 = t(2, 1); + double r33 = t(2, 2); + double t1 = t(0, 3); + double t2 = t(1, 3); + double t3 = t(2, 3); + for(; x_in != x_in.end(); ++x_in, ++y_in, ++z_in, ++x_out, ++y_out, ++z_out) { - point = t * Eigen::Vector3f(*x_in, *y_in, *z_in); - *x_out = point.x(); - *y_out = point.y(); - *z_out = point.z(); + // Equivalent to "point = t * Eigen::Vector3f(*x_in, *y_in, *z_in);" + *x_out = *x_in * r11 + *y_in * r12 + *z_in * r13 + t1; + *y_out = *x_in * r21 + *y_in * r22 + *z_in * r23 + t2; + *z_out = *x_in * r31 + *y_in * r32 + *z_in * r33 + t3; } } inline