From 6d547d7ce6d9847bcf47e121fd799718f356d90d Mon Sep 17 00:00:00 2001
From: James Liu <contact@jamessliu.com>
Date: Mon, 19 Feb 2024 16:12:41 -0800
Subject: [PATCH] Allow Mesh-related queue phase systems to parallelize
 (#11804)

# Objective
Partially addresses #3548. `queue_shadows` and `queue_material_meshes`
cannot parallelize because of the `ResMut<RenderMeshInstances>`
parameter for `queue_material_meshes`.

## Solution
Change the `material_bind_group` field to use atomics instead of needing
full mutable access. Change the `ResMut` to a `Res`, which should allow
both sets of systems to parallelize without issue.

## Performance
Tested against `many_foxes`, this has a significant improvement over the
entire render schedule. (Yellow is this PR, red is main)

![image](https://github.com/bevyengine/bevy/assets/3137680/6cc7f346-4f50-4f12-a383-682a9ce1daf6)

The use of atomics does seem to have a negative effect on
`queue_material_meshes` (roughly a 8.29% increase in time spent in the
system).

![image](https://github.com/bevyengine/bevy/assets/3137680/7907079a-863d-4760-aa5b-df68c006ea36)

`queue_shadows` seems to be ever so slightly slower (1.6% more time
spent) in the system.

![image](https://github.com/bevyengine/bevy/assets/3137680/6d90af73-b922-45e4-bae5-df200e8b9784)

`batch_and_prepare_render_phase` seems to be a mix, but overall seems to
be slightly *faster* by about 5%.

![image](https://github.com/bevyengine/bevy/assets/3137680/fac638ff-8c90-436b-9362-c6209b18957c)
---
 crates/bevy_pbr/src/material.rs               | 43 ++++++++++++++++---
 crates/bevy_pbr/src/render/light.rs           |  8 ++--
 crates/bevy_pbr/src/render/mesh.rs            | 15 ++++---
 .../src/render_resource/resource_macros.rs    | 12 ++++++
 4 files changed, 61 insertions(+), 17 deletions(-)
diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs
index a4a8eb8401e8a..5f8bce5dfe7bb 100644
--- a/crates/bevy_pbr/src/material.rs
+++ b/crates/bevy_pbr/src/material.rs
@@ -31,8 +31,9 @@ use bevy_render::{
     Extract, ExtractSchedule, Render, RenderApp, RenderSet,
 };
 use bevy_utils::{tracing::error, HashMap, HashSet};
-use std::hash::Hash;
 use std::marker::PhantomData;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::{hash::Hash, num::NonZeroU32};
 
 use self::{irradiance_volume::IrradianceVolume, prelude::EnvironmentMapLight};
 
@@ -236,9 +237,7 @@ where
                             .after(prepare_materials::<M>),
                         queue_material_meshes::<M>
                             .in_set(RenderSet::QueueMeshes)
-                            .after(prepare_materials::<M>)
-                            // queue_material_meshes only writes to `material_bind_group_id`, which `queue_shadows` doesn't read
-                            .ambiguous_with(render::queue_shadows::<M>),
+                            .after(prepare_materials::<M>),
                     ),
                 );
         }
@@ -466,7 +465,7 @@ pub fn queue_material_meshes<M: Material>(
     msaa: Res<Msaa>,
     render_meshes: Res<RenderAssets<Mesh>>,
     render_materials: Res<RenderMaterials<M>>,
-    mut render_mesh_instances: ResMut<RenderMeshInstances>,
+    render_mesh_instances: Res<RenderMeshInstances>,
     render_material_instances: Res<RenderMaterialInstances<M>>,
     render_lightmaps: Res<RenderLightmaps>,
     mut views: Query<(
@@ -592,7 +591,7 @@ pub fn queue_material_meshes<M: Material>(
             let Some(material_asset_id) = render_material_instances.get(visible_entity) else {
                 continue;
             };
-            let Some(mesh_instance) = render_mesh_instances.get_mut(visible_entity) else {
+            let Some(mesh_instance) = render_mesh_instances.get(visible_entity) else {
                 continue;
             };
             let Some(mesh) = render_meshes.get(mesh_instance.mesh_asset_id) else {
@@ -646,7 +645,9 @@ pub fn queue_material_meshes<M: Material>(
                 }
             };
 
-            mesh_instance.material_bind_group_id = material.get_bind_group_id();
+            mesh_instance
+                .material_bind_group_id
+                .set(material.get_bind_group_id());
 
             match material.properties.alpha_mode {
                 AlphaMode::Opaque => {
@@ -795,6 +796,34 @@ pub struct PreparedMaterial<T: Material> {
 #[derive(Component, Clone, Copy, Default, PartialEq, Eq, Deref, DerefMut)]
 pub struct MaterialBindGroupId(Option<BindGroupId>);
 
+/// An atomic version of [`MaterialBindGroupId`] that can be read from and written to
+/// safely from multiple threads.
+#[derive(Default)]
+pub struct AtomicMaterialBindGroupId(AtomicU32);
+
+impl AtomicMaterialBindGroupId {
+    /// Stores a value atomically. Uses [`Ordering::Relaxed`] so there is zero guarentee of ordering
+    /// relative to other operations.
+    ///
+    /// See also:  [`AtomicU32::store`].
+    pub fn set(&self, id: MaterialBindGroupId) {
+        let id = if let Some(id) = id.0 {
+            NonZeroU32::from(id).get()
+        } else {
+            0
+        };
+        self.0.store(id, Ordering::Relaxed);
+    }
+
+    /// Loads a value atomically. Uses [`Ordering::Relaxed`] so there is zero guarentee of ordering
+    /// relative to other operations.
+    ///
+    /// See also:  [`AtomicU32::load`].
+    pub fn get(&self) -> MaterialBindGroupId {
+        MaterialBindGroupId(NonZeroU32::new(self.0.load(Ordering::Relaxed)).map(BindGroupId::from))
+    }
+}
+
 impl<T: Material> PreparedMaterial<T> {
     pub fn get_bind_group_id(&self) -> MaterialBindGroupId {
         MaterialBindGroupId(Some(self.bind_group.id()))
diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs
index e57dd38f4a956..5a923e73b17ad 100644
--- a/crates/bevy_pbr/src/render/light.rs
+++ b/crates/bevy_pbr/src/render/light.rs
@@ -1591,7 +1591,7 @@ pub fn queue_shadows<M: Material>(
     shadow_draw_functions: Res<DrawFunctions<Shadow>>,
     prepass_pipeline: Res<PrepassPipeline<M>>,
     render_meshes: Res<RenderAssets<Mesh>>,
-    mut render_mesh_instances: ResMut<RenderMeshInstances>,
+    render_mesh_instances: Res<RenderMeshInstances>,
     render_materials: Res<RenderMaterials<M>>,
     render_material_instances: Res<RenderMaterialInstances<M>>,
     mut pipelines: ResMut<SpecializedMeshPipelines<PrepassPipeline<M>>>,
@@ -1637,7 +1637,7 @@ pub fn queue_shadows<M: Material>(
             // NOTE: Lights with shadow mapping disabled will have no visible entities
             // so no meshes will be queued
             for entity in visible_entities.iter().copied() {
-                let Some(mesh_instance) = render_mesh_instances.get_mut(&entity) else {
+                let Some(mesh_instance) = render_mesh_instances.get(&entity) else {
                     continue;
                 };
                 if !mesh_instance.shadow_caster {
@@ -1697,7 +1697,9 @@ pub fn queue_shadows<M: Material>(
                     }
                 };
 
-                mesh_instance.material_bind_group_id = material.get_bind_group_id();
+                mesh_instance
+                    .material_bind_group_id
+                    .set(material.get_bind_group_id());
 
                 shadow_phase.add(Shadow {
                     draw_function: draw_shadow_mesh,
diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs
index d34121c919e89..7ea546749f744 100644
--- a/crates/bevy_pbr/src/render/mesh.rs
+++ b/crates/bevy_pbr/src/render/mesh.rs
@@ -1,7 +1,8 @@
 use crate::{
-    MaterialBindGroupId, NotShadowCaster, NotShadowReceiver, PreviousGlobalTransform, Shadow,
-    ViewFogUniformOffset, ViewLightProbesUniformOffset, ViewLightsUniformOffset,
-    CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS,
+    AtomicMaterialBindGroupId, MaterialBindGroupId, NotShadowCaster, NotShadowReceiver,
+    PreviousGlobalTransform, Shadow, ViewFogUniformOffset, ViewLightProbesUniformOffset,
+    ViewLightsUniformOffset, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT,
+    MAX_DIRECTIONAL_LIGHTS,
 };
 use bevy_app::{Plugin, PostUpdate};
 use bevy_asset::{load_internal_asset, AssetId, Handle};
@@ -251,14 +252,14 @@ bitflags::bitflags! {
 pub struct RenderMeshInstance {
     pub transforms: MeshTransforms,
     pub mesh_asset_id: AssetId<Mesh>,
-    pub material_bind_group_id: MaterialBindGroupId,
+    pub material_bind_group_id: AtomicMaterialBindGroupId,
     pub shadow_caster: bool,
     pub automatic_batching: bool,
 }
 
 impl RenderMeshInstance {
     pub fn should_batch(&self) -> bool {
-        self.automatic_batching && self.material_bind_group_id.is_some()
+        self.automatic_batching && self.material_bind_group_id.get().is_some()
     }
 }
 
@@ -323,7 +324,7 @@ pub fn extract_meshes(
                     mesh_asset_id: handle.id(),
                     transforms,
                     shadow_caster: !not_shadow_caster,
-                    material_bind_group_id: MaterialBindGroupId::default(),
+                    material_bind_group_id: AtomicMaterialBindGroupId::default(),
                     automatic_batching: !no_automatic_batching,
                 },
             ));
@@ -475,7 +476,7 @@ impl GetBatchData for MeshPipeline {
                 maybe_lightmap.map(|lightmap| lightmap.uv_rect),
             ),
             mesh_instance.should_batch().then_some((
-                mesh_instance.material_bind_group_id,
+                mesh_instance.material_bind_group_id.get(),
                 mesh_instance.mesh_asset_id,
                 maybe_lightmap.map(|lightmap| lightmap.image),
             )),
diff --git a/crates/bevy_render/src/render_resource/resource_macros.rs b/crates/bevy_render/src/render_resource/resource_macros.rs
index ab5ce2cbebe19..de2ea0ec00e58 100644
--- a/crates/bevy_render/src/render_resource/resource_macros.rs
+++ b/crates/bevy_render/src/render_resource/resource_macros.rs
@@ -136,6 +136,18 @@ macro_rules! define_atomic_id {
                 }))
             }
         }
+
+        impl From<$atomic_id_type> for core::num::NonZeroU32 {
+            fn from(value: $atomic_id_type) -> Self {
+                value.0
+            }
+        }
+
+        impl From<core::num::NonZeroU32> for $atomic_id_type {
+            fn from(value: core::num::NonZeroU32) -> Self {
+                Self(value)
+            }
+        }
     };
 }