Add updated performance report (#255)

- adds a new performance report, since the latest one predates important performance enhancing changes in the code. - fixes a launch file missing the argument to change the configuration yaml. - adds the possiblity to measure the processing latency for beluga_amcl. Signed-off-by: Gerardo Puga <[email protected]>
Ekumen-OS · Sep 7, 2023 · 5c44158 · 5c44158
1 parent c0e2de1
commit 5c44158
Show file tree

Hide file tree

Showing 13 changed files with 581 additions and 23 deletions.
diff --git a/beluga_amcl/README.md b/beluga_amcl/README.md
@@ -5,21 +5,23 @@ The compatibility between `beluga_amcl` and its longstanding counterparts in the
 
 ## Table of Contents
 
-- [ROS 2 Interface](#ros-2-interface)
-  - [Parameters](#parameters)
-  - [Subscribed Topics](#subscribed-topics)
-  - [Published Topics](#published-topics)
-  - [Published Transforms](#published-transforms)
-  - [Advertised Services](#advertised-services)
-- [ROS 1 Interface](#ros-1-interface)
-  - [Parameters](#parameters-1)
-  - [Subscribed Topics](#subscribed-topics-1)
-  - [Published Topics](#published-topics-1)
-  - [Published Transforms](#published-transforms-1)
-  - [Advertised Services](#advertised-services-1)
-  - [Called Services](#called-services)
-- [Performance](#performance)
-- [Next Steps](#next-steps)
+- [Beluga AMCL](#beluga-amcl)
+  - [Table of Contents](#table-of-contents)
+  - [ROS 2 Interface](#ros-2-interface)
+    - [Parameters](#parameters)
+    - [Subscribed Topics](#subscribed-topics)
+    - [Published Topics](#published-topics)
+    - [Published Transforms](#published-transforms)
+    - [Advertised Services](#advertised-services)
+  - [ROS 1 Interface](#ros-1-interface)
+    - [Parameters](#parameters-1)
+    - [Subscribed Topics](#subscribed-topics-1)
+    - [Published Topics](#published-topics-1)
+    - [Published Transforms](#published-transforms-1)
+    - [Advertised Services](#advertised-services-1)
+    - [Called Services](#called-services)
+  - [Performance](#performance)
+  - [Next Steps](#next-steps)
 
 ## ROS 2 Interface
 
@@ -117,13 +119,13 @@ Defaults are `map`, `odom` and `base`.
 
 Performance reports are periodically generated and uploaded to track performance improvements and regressions. These reports are generated using a set of scripts in the [beluga_benchmark](../beluga_benchmark) package which can be used to compare the performance of `beluga_amcl` against that of `nav2_amcl` using a synthetic dataset.
 
-The following plot displays the RSS (Resident Set Size), CPU usage and APE (Absolute Pose Error) statistics for both  `beluga_amcl` and `nav2_amcl`, with particle sizes ranging between 250 and 200000 and sensor model `likelihood field`.
+The following plot displays the RSS (Resident Set Size), CPU usage, APE (Absolute Pose Error) and processing latency statistics for both  `beluga_amcl` and `nav2_amcl`, with particle sizes ranging between 250 and 200000 and sensor model `likelihood field`.
 
-![Beluga vs Nav2 AMCL](../beluga_benchmark/docs/reports/2023-06-03/likelihood_beluga_seq_vs_amcl_log.png)
+![Beluga vs Nav2 AMCL](../beluga_benchmark/docs/reports/2023-09-02/likelihood_beluga_vs_beluga_vs_amcl.png)
 
-The following plot displays the RSS (Resident Set Size), CPU usage and APE (Absolute Pose Error) statistics for both  `beluga_amcl` and `nav2_amcl`, with particle sizes ranging between 250 and 200000 and sensor model `beam`.
+The following plot displays the RSS (Resident Set Size), CPU usage, APE (Absolute Pose Error) and processing latency statistics for both  `beluga_amcl` and `nav2_amcl`, with particle sizes ranging between 250 and 200000 and sensor model `beam`.
 
-![Beluga vs Nav2 AMCL](../beluga_benchmark/docs/reports/2023-06-03/beam_beluga_seq_vs_amcl_log.png)
+![Beluga vs Nav2 AMCL](../beluga_benchmark/docs/reports/2023-09-02/beam_beluga_vs_beluga_vs_amcl.png)
 
 Further details can be found in [the reports folder here](../beluga_benchmark/docs/reports/).
 

diff --git a/beluga_benchmark/beluga_benchmark/compare_results.py b/beluga_benchmark/beluga_benchmark/compare_results.py
@@ -29,6 +29,7 @@
 DIR_NAME_PATTERN = 'benchmark_([0-9]+)_particles_output'
 SUPPORTED_TOPIC_NAMES = ('/pose', '/amcl_pose')
 EVO_RESULTS_FILE_NAME = 'evo_results.zip'
+TERMINAL_OUTPUT_LOG_FILE_NAME = 'output.log'
 
 
 def get_bag_est_topic(bag_path: Path):
@@ -69,6 +70,39 @@ def read_evo_stats(zip_file_path):
     return file_interface.load_res_file(zip_file_path).stats
 
 
+def parse_latency_data(output_file_path):
+    import re
+
+    # Example line:
+    pattern = re.compile(
+        r'\[.*\] \[INFO\] \[([0-9]*\.[0-9]*)\] \[amcl\]: Particle filter update iteration stats: '
+        + r'[0-9]* particles [0-9]* points - ([0-9]*\.[0-9]*)ms'
+    )
+    latencies_seq = []
+    with open(output_file_path, 'r') as f:
+        for line in f:
+            if not pattern.match(line):
+                continue
+            # first match is the whole line, second match the timestamp, and third the latency
+            latency = pattern.match(line).group(2)
+            latencies_seq.append(float(latency) * 1e-3)  # convert to seconds
+    return (
+        {
+            'latency_min': min(latencies_seq),
+            'latency_max': max(latencies_seq),
+            'latency_mean': sum(latencies_seq) / len(latencies_seq),
+            'latency_median': sorted(latencies_seq)[len(latencies_seq) // 2],
+        }
+        if latencies_seq
+        else {
+            'latency_min': 0.0,
+            'latency_max': 0.0,
+            'latency_mean': 0.0,
+            'latency_median': 0.0,
+        }
+    )
+
+
 def create_parameterized_series(results_path: Path):
     particles = []
     peak_rss = []
@@ -77,6 +111,10 @@ def create_parameterized_series(results_path: Path):
     ape_mean = []
     ape_median = []
     ape_max = []
+    latency_min = []
+    latency_max = []
+    latency_mean = []
+    latency_median = []
 
     for dir in results_path.iterdir():
         if not dir.is_dir():
@@ -100,6 +138,13 @@ def create_parameterized_series(results_path: Path):
         ape_max.append(stats["max"])
         ape_mean.append(stats["mean"])
         ape_median.append(stats["median"])
+        terminal_output_path = dir / TERMINAL_OUTPUT_LOG_FILE_NAME
+        latency_stats = parse_latency_data(terminal_output_path)
+        latency_min.append(latency_stats['latency_min'])
+        latency_max.append(latency_stats['latency_max'])
+        latency_mean.append(latency_stats['latency_mean'])
+        latency_median.append(latency_stats['latency_median'])
+
     return (
         pd.DataFrame(
             {
@@ -110,6 +155,10 @@ def create_parameterized_series(results_path: Path):
                 'ape_mean': ape_mean,
                 'ape_median': ape_median,
                 'ape_max': ape_max,
+                'latency_min': latency_min,
+                'latency_max': latency_max,
+                'latency_mean': latency_mean,
+                'latency_median': latency_median,
             }
         )
         .set_index('particles_n')
@@ -145,12 +194,23 @@ def main():
         help='Use log scale on y axis',
     )
 
+    default_plots = ['cpu_usage', 'peak_rss', 'ape_rmse', 'latency_median']
+    arg_parser.add_argument(
+        '--plot-names',
+        type=str,
+        nargs='*',  # 0 or more values expected => creates a list
+        default=default_plots,
+        help='List of plots to generate. Default: ' + ', '.join(default_plots),
+    )
+
     args = arg_parser.parse_args()
 
     assert len(args.series) == len(args.label), 'Number of series and labels must match'
 
     series = [
-        create_parameterized_series(series).add_prefix(label + '_')
+        create_parameterized_series(series)
+        .filter(items=args.plot_names)
+        .add_prefix(label + '_')
         for label, series in zip(args.label, args.series)
     ]
 

diff --git a/beluga_benchmark/docs/reports/2023-09-02/REPORT.md b/beluga_benchmark/docs/reports/2023-09-02/REPORT.md
@@ -0,0 +1,122 @@
+# Performance comparison between beluga_amcl and nav2_amcl
+
+## Environment details
+
+- CPU: **Intel(R) Core(TM) i9-9900 CPU @ 3.10GHz x 16 cores**
+- CPU Caches: L1 Data 32 KiB (x8), L1 Instruction 32 KiB (x8), L2 Unified 256 KiB (x8), L3 Unified 16384 KiB (x1)
+- RAM: 16384 MB
+- Host OS: Ubuntu 22.04.6 LTS
+- ROS 2 version: **Humble Hawksbill**
+- AMCL version: `ros-humble-nav2-amcl` package, version 1.1.9-1jammy.20230807.174459
+
+## Experimental setup
+
+The following configuration was used during the experiments:
+
+- The benchmarks were run using 250, 300, 400, 500, 750, 1000, 2000, 5000, 10000, 20000, 50000, 100000 and 200000 particles.
+- `beluga_amcl` was run both using multi-threaded (`par`) and single-threaded (`seq`) configurations. `nav2_amcl` only provides single-threaded execution.
+- Both the `beam sensor` and the `likelihood field` sensor model were tested.
+- The bagfile containing the synthetic dataset was replayed at 1x speed (real time).
+
+More specific configuration details can be found in the `yaml` files in the `baseline_configurations/` folder:
+
+- `nav2_amcl` (likelihood field) uses [likelihood_params.yaml](likelihood_params.yaml)
+- `beluga_amcl` (likelihood field, single-threaded) uses [likelihood_params.yaml](likelihood_params.yaml)
+- `beluga_amcl` (likelihood field, multi-threaded) uses [likelihood_params_par.yaml](likelihood_params_par.yaml)
+- `nav2_amcl` (beam) uses [beam_params.yaml](beam_params.yaml)
+- `beluga_amcl` (beam, single-threaded) uses [beam_params.yaml](beam_params.yaml)
+- `beluga_amcl` (beam, multi-threaded) uses [beam_params_par.yaml](beam_params_par.yaml)
+
+Except for the multithreading and sensor model parameters, the configuration on all of the files is identical.
+
+## Recorded metrics
+
+The following metrics were recorded during each run:
+
+- RSS (Resident Set Size), amount of memory alloated to the process. Measured in megabytes.
+- CPU usage. Measured in percentage of the total CPU usage.
+- rms APE (root-mean-squared Absolute Pose Error) statistics. In meters.
+- Processing latency (time interval between laser-scan reception and pose estimation). Measured in milliseconds.
+
+The processing latency was only recorded for `beluga_amcl`. The unmodified `nav2_amcl` binary does not provide this metric in the process output.
+
+## Results
+
+#### Beluga vs. Nav2 AMCL using Likelihood Field sensor model
+
+In the following plot the results of the benchmark are shown for all three of the tested alternatives. The vertical scale is logarithmic to better show the differences between the configurations throughout the whole range of particle counts.
+
+![Beluga Seq vs Beluga Par vs. Nav2 AMCL with Likelihood Field Sensor Model](likelihood_beluga_vs_beluga_vs_amcl.png)
+
+Comments on the results:
+
+- The memory usage of `beluga_amcl` (single and multi-threaded) is significantly lower than that of `nav2_amcl`.
+- Both `beluga_amcl` configurations have a lower CPU load than `nav2_amcl` when using the `likelihood` sensor model. The multi-threaded configuration of `beluga_amcl` causes more CPU load than the single-threaded one due to the additional synchronization overhead.
+- Above $50k$ particles `nav2_amcl` single-threaded process begins to saturate the CPU and its APE metrics begin to deteriorate, while `beluga_amcl` in both configurations remains stable thanks to its lower CPU load. For particle counts below $50k$ the APE metrics of all three alternatives are similar.
+- The multi-threaded configuration of `beluga_amcl` has lower latency than the single-threaded one, at the expense of additional CPU load. The latency of `nav2_amcl` was not measured for the reasons explained above.
+
+#### Beluga vs. Nav2 AMCL using Beam sensor model
+
+In the following plot the results of the benchmark are shown for all three of the tested configurations when using the Beam Sensor model. The vertical scale is logarithmic to better show the differences between the configurations throughout the whole range of particle counts.
+
+![Beluga Seq vs Beluga Par vs. Nav2 AMCL with Beam Sensor Model](beam_beluga_vs_beluga_vs_amcl.png)
+
+Comments on the results:
+
+- `beluga_amcl` in both multi-threaded and single-threaded configurations uses significantly less memory than `nav2_amcl`.
+- Both the single-threaded configuration of `beluga_amcl` and `nav2_amcl` have similar CPU load performance when using the `beam` sensor model. The multi-threaded configuration of `beluga_amcl` uses more CPU than the single-threaded one due to the additional synchronization overhead.
+- Above $50k$ particles both `nav2_amcl` and the single-threaded configuration of `beluga_amcl` begin to saturate the CPU and their APE metrics begin to deteriorate, while `beluga_amcl` in multi-threaded configuration remains stable. For particle counts below $50k$ the APE metrics of all three alternatives are similar.
+- The multi-threaded configuration of `beluga_amcl` has lower latency than the single-threaded one, at the expense of additional CPU load. The latency of `nav2_amcl` was not measured for the reasons explained above.
+
+## Conclusions
+
+- `beluga_amcl`'s memory usage is significantly lower than that of `nav2_amcl` in all configurations.
+- The single-threaded configuration of `beluga_amcl` performs at a lower or equal CPU load than that of `nav2_amcl`. When configured to use the `likelihood` sensor model the performance of the single-threaded `beluga_amcl` configuration is significantly better than that of `nav2_amcl`.
+- The multi-threaded configuration of `beluga_amcl` always uses more CPU than the single-threaded configuration.
+- `beluga_amcl`'s APE performance is similar to that of `nav2_amcl` for lower particle counts. For higher particle counts the performance begins to deteriorate when the processes saturate the available CPU resources.
+- The multi-threaded configuration of `beluga_amcl` has lower latency than the single-threaded one, at the expense of additional CPU load. Also, Thanks to the reduced latency and higher CPU saturation ceiling the localization solution using `beluga_amcl` in multi-threaded configuration remains stable for higher particle counts.
+
+## How to reproduce
+
+To replicate the benchmarks, after building and sourcing the workspace, run the following commands from the current directory:
+
+```bash
+mkdir beam_beluga_seq
+cd beam_beluga_seq
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000  --params-file $(pwd)/../../baseline_configurations/beam_params.yaml
+cd -
+mkdir beam_beluga_par
+cd beam_beluga_par
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000  --params-file $(pwd)/../../baseline_configurations/beam_params_par.yaml
+cd -
+mkdir beam_nav2_amcl
+cd beam_nav2_amcl
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000 --params-file $(pwd)/../../baseline_configurations/beam_params.yaml --package nav2_amcl --executable amcl
+cd -
+mkdir likelihood_beluga_seq
+cd likelihood_beluga_seq
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000  --params-file $(pwd)/../../baseline_configurations/likelihood_params.yaml
+cd -
+mkdir likelihood_beluga_par
+cd likelihood_beluga_par
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000  --params-file $(pwd)/../../baseline_configurations/likelihood_params_par.yaml
+cd -
+mkdir likelihood_nav2_amcl
+cd likelihood_nav2_amcl
+ros2 run beluga_benchmark parameterized_run --initial-pose-y 2.0 250 300 400 500 750 1000 2000 5000 10000 20000 50000 100000 200000 --params-file $(pwd)/../../baseline_configurations/likelihood_params.yaml --package nav2_amcl --executable amcl
+cd -
+```
+
+Once the data has been acquired, it can be visualized using the following commands:
+
+```bash
+ros2 run beluga_benchmark compare_results \
+    -s beam_beluga_seq -l beam_beluga_seq \
+    -s beam_beluga_par -l beam_beluga_par \
+    -s beam_nav2_amcl  -l beam_nav2_amcl --use-ylog
+
+ros2 run beluga_benchmark compare_results \
+    -s likelihood_beluga_seq -l likelihood_beluga_seq \
+    -s likelihood_beluga_par -l likelihood_beluga_par \
+    -s likelihood_nav2_amcl  -l likelihood_nav2_amcl --use-ylog
+```
diff --git a/beluga_benchmark/docs/reports/2023-09-02/beam_beluga_vs_beluga_vs_amcl.png b/beluga_benchmark/docs/reports/2023-09-02/beam_beluga_vs_beluga_vs_amcl.png
diff --git a/beluga_benchmark/docs/reports/2023-09-02/beam_beluga_vs_beluga_vs_amcl_linear.png b/beluga_benchmark/docs/reports/2023-09-02/beam_beluga_vs_beluga_vs_amcl_linear.png
diff --git a/beluga_benchmark/docs/reports/2023-09-02/likelihood_beluga_vs_beluga_vs_amcl.png b/beluga_benchmark/docs/reports/2023-09-02/likelihood_beluga_vs_beluga_vs_amcl.png
diff --git a/...enchmark/docs/reports/2023-09-02/likelihood_beluga_vs_beluga_vs_amcl_linear.png b/...enchmark/docs/reports/2023-09-02/likelihood_beluga_vs_beluga_vs_amcl_linear.png
diff --git a/beluga_benchmark/docs/reports/baseline_configurations/beam_params.yaml b/beluga_benchmark/docs/reports/baseline_configurations/beam_params.yaml
@@ -0,0 +1,93 @@
+amcl:
+  ros__parameters:
+    # Odometry motion model type.
+    robot_model_type: nav2_amcl::DifferentialMotionModel
+    # Expected process noise in odometry’s rotation estimate from rotation.
+    alpha1: 0.1
+    # Expected process noise in odometry’s rotation estimate from translation.
+    alpha2: 0.05
+    # Expected process noise in odometry’s translation estimate from translation.
+    alpha3: 0.1
+    # Expected process noise in odometry’s translation estimate from rotation.
+    alpha4: 0.05
+    # Expected process noise in odometry's strafe estimate from translation.
+    alpha5: 0.1
+    # The name of the coordinate frame published by the localization system.
+    global_frame_id: map
+    # The name of the coordinate frame published by the odometry system.
+    odom_frame_id: odom
+    # The name of the coordinate frame of the robot base.
+    base_frame_id: base
+    # The name of the topic where the map is published by the map server.
+    map_topic: map
+    # The name of the topic where scans are being published.
+    scan_topic: scan
+    # The name of the topic where an initial pose can be published.
+    # The particle filter will be reset using the provided pose with covariance.
+    initial_pose_topic: initialpose
+    # Maximum number of particles that will be used.
+    max_particles: 2000
+    # Minimum number of particles that will be used.
+    min_particles: 500
+    # Error allowed by KLD criteria.
+    pf_err: 0.05
+    # KLD criteria parameter.
+    # Upper standard normal quantile for the probability that the error in the
+    # estimated distribution is less than pf_err.
+    pf_z: 3.0
+    # Fast exponential filter constant, used to filter the average particles weights.
+    # Random particles are added if the fast filter result is larger than the slow filter result
+    # allowing the particle filter to recover from a bad approximation.
+    recovery_alpha_fast: 0.
+    # Slow exponential filter constant, used to filter the average particles weights.
+    # Random particles are added if the fast filter result is larger than the slow filter result
+    # allowing the particle filter to recover from a bad approximation.
+    recovery_alpha_slow: 0.
+    # Resample will happen after the amount of updates specified here happen.
+    resample_interval: 1
+    # Minimum angle difference from last resample for resampling to happen again.
+    update_min_a: 0.2
+    # Maximum angle difference from last resample for resampling to happen again.
+    update_min_d: 0.25
+    # Laser sensor model type.
+    laser_model_type: beam
+    # Maximum distance of an obstacle (if the distance is higher, this one will be used in the likelihood map).
+    laser_likelihood_max_dist: 2.0
+    # Maximum range of the laser.
+    laser_max_range: 100.0
+    # Maximum number of beams to use in the likelihood field sensor model.
+    max_beams: 100
+    # Weight used to combine the probability of hitting an obstacle.
+    z_hit: 0.5
+    # Weight used to combine the probability of random noise in perception.
+    z_rand: 0.5
+    # Standard deviation of a gaussian centered arounds obstacles.
+    sigma_hit: 0.2
+    # Whether to broadcast map to odom transform or not.
+    tf_broadcast: true
+    # Transform tolerance allowed.
+    transform_tolerance: 1.0
+    # Execution policy used to apply the motion update and importance weight steps.
+    # Valid options: "seq", "par".
+    execution_policy: seq
+    # Whether to set initial pose based on parameters.
+    # When enabled, particles will be initialized with the specified pose coordinates and covariance.
+    set_initial_pose: false
+    # Initial pose x coordinate.
+    initial_pose.x: 0.0
+    # Initial pose y coordinate.
+    initial_pose.y: 0.0
+    # Initial pose yaw coordinate.
+    initial_pose.yaw: 0.0
+    # Initial pose xx covariance.
+    initial_pose.covariance_x: 0.25
+    # Initial pose yy covariance.
+    initial_pose.covariance_y: 0.25
+    # Initial pose yawyaw covariance.
+    initial_pose.covariance_yaw: 0.0685
+    # Initial pose xy covariance.
+    initial_pose.covariance_xy: 0.0
+    # Initial pose xyaw covariance.
+    initial_pose.covariance_xyaw: 0.0
+    # Initial pose yyaw covariance.
+    initial_pose.covariance_yyaw: 0.0