bevy_render/renderer/
mod.rs

1mod graph_runner;
2mod render_device;
3
4use bevy_derive::{Deref, DerefMut};
5use bevy_tasks::ComputeTaskPool;
6use bevy_utils::tracing::{error, info, info_span, warn};
7pub use graph_runner::*;
8pub use render_device::*;
9
10use crate::{
11    diagnostic::{internal::DiagnosticsRecorder, RecordDiagnostics},
12    render_graph::RenderGraph,
13    render_phase::TrackedRenderPass,
14    render_resource::RenderPassDescriptor,
15    settings::{WgpuSettings, WgpuSettingsPriority},
16    view::{ExtractedWindows, ViewTarget},
17};
18use bevy_ecs::{prelude::*, system::SystemState};
19use bevy_time::TimeSender;
20use bevy_utils::Instant;
21use std::sync::Arc;
22use wgpu::{
23    Adapter, AdapterInfo, CommandBuffer, CommandEncoder, DeviceType, Instance, Queue,
24    RequestAdapterOptions,
25};
26
27/// Updates the [`RenderGraph`] with all of its nodes and then runs it to render the entire frame.
28pub fn render_system(world: &mut World, state: &mut SystemState<Query<Entity, With<ViewTarget>>>) {
29    world.resource_scope(|world, mut graph: Mut<RenderGraph>| {
30        graph.update(world);
31    });
32
33    let diagnostics_recorder = world.remove_resource::<DiagnosticsRecorder>();
34
35    let graph = world.resource::<RenderGraph>();
36    let render_device = world.resource::<RenderDevice>();
37    let render_queue = world.resource::<RenderQueue>();
38    let render_adapter = world.resource::<RenderAdapter>();
39
40    let res = RenderGraphRunner::run(
41        graph,
42        render_device.clone(), // TODO: is this clone really necessary?
43        diagnostics_recorder,
44        &render_queue.0,
45        &render_adapter.0,
46        world,
47        |encoder| {
48            crate::view::screenshot::submit_screenshot_commands(world, encoder);
49        },
50    );
51
52    match res {
53        Ok(Some(diagnostics_recorder)) => {
54            world.insert_resource(diagnostics_recorder);
55        }
56        Ok(None) => {}
57        Err(e) => {
58            error!("Error running render graph:");
59            {
60                let mut src: &dyn std::error::Error = &e;
61                loop {
62                    error!("> {}", src);
63                    match src.source() {
64                        Some(s) => src = s,
65                        None => break,
66                    }
67                }
68            }
69
70            panic!("Error running render graph: {e}");
71        }
72    }
73
74    {
75        let _span = info_span!("present_frames").entered();
76
77        // Remove ViewTarget components to ensure swap chain TextureViews are dropped.
78        // If all TextureViews aren't dropped before present, acquiring the next swap chain texture will fail.
79        let view_entities = state.get(world).iter().collect::<Vec<_>>();
80        for view_entity in view_entities {
81            world.entity_mut(view_entity).remove::<ViewTarget>();
82        }
83
84        let mut windows = world.resource_mut::<ExtractedWindows>();
85        for window in windows.values_mut() {
86            if let Some(wrapped_texture) = window.swap_chain_texture.take() {
87                if let Some(surface_texture) = wrapped_texture.try_unwrap() {
88                    // TODO(clean): winit docs recommends calling pre_present_notify before this.
89                    // though `present()` doesn't present the frame, it schedules it to be presented
90                    // by wgpu.
91                    // https://docs.rs/winit/0.29.9/wasm32-unknown-unknown/winit/window/struct.Window.html#method.pre_present_notify
92                    surface_texture.present();
93                }
94            }
95        }
96
97        #[cfg(feature = "tracing-tracy")]
98        bevy_utils::tracing::event!(
99            bevy_utils::tracing::Level::INFO,
100            message = "finished frame",
101            tracy.frame_mark = true
102        );
103    }
104
105    crate::view::screenshot::collect_screenshots(world);
106
107    // update the time and send it to the app world
108    let time_sender = world.resource::<TimeSender>();
109    if let Err(error) = time_sender.0.try_send(Instant::now()) {
110        match error {
111            bevy_time::TrySendError::Full(_) => {
112                panic!("The TimeSender channel should always be empty during render. You might need to add the bevy::core::time_system to your app.",);
113            }
114            bevy_time::TrySendError::Disconnected(_) => {
115                // ignore disconnected errors, the main world probably just got dropped during shutdown
116            }
117        }
118    }
119}
120
121/// A wrapper to safely make `wgpu` types Send / Sync on web with atomics enabled.
122/// On web with `atomics` enabled the inner value can only be accessed
123/// or dropped on the `wgpu` thread or else a panic will occur.
124/// On other platforms the wrapper simply contains the wrapped value.
125#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
126#[derive(Debug, Clone, Deref, DerefMut)]
127pub struct WgpuWrapper<T>(T);
128#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
129#[derive(Debug, Clone, Deref, DerefMut)]
130pub struct WgpuWrapper<T>(send_wrapper::SendWrapper<T>);
131
132// SAFETY: SendWrapper is always Send + Sync.
133#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
134unsafe impl<T> Send for WgpuWrapper<T> {}
135#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
136unsafe impl<T> Sync for WgpuWrapper<T> {}
137
138#[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
139impl<T> WgpuWrapper<T> {
140    pub fn new(t: T) -> Self {
141        Self(t)
142    }
143}
144
145#[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
146impl<T> WgpuWrapper<T> {
147    pub fn new(t: T) -> Self {
148        Self(send_wrapper::SendWrapper::new(t))
149    }
150}
151
152/// This queue is used to enqueue tasks for the GPU to execute asynchronously.
153#[derive(Resource, Clone, Deref, DerefMut)]
154pub struct RenderQueue(pub Arc<WgpuWrapper<Queue>>);
155
156/// The handle to the physical device being used for rendering.
157/// See [`Adapter`] for more info.
158#[derive(Resource, Clone, Debug, Deref, DerefMut)]
159pub struct RenderAdapter(pub Arc<WgpuWrapper<Adapter>>);
160
161/// The GPU instance is used to initialize the [`RenderQueue`] and [`RenderDevice`],
162/// as well as to create [`WindowSurfaces`](crate::view::window::WindowSurfaces).
163#[derive(Resource, Clone, Deref, DerefMut)]
164pub struct RenderInstance(pub Arc<WgpuWrapper<Instance>>);
165
166/// The [`AdapterInfo`] of the adapter in use by the renderer.
167#[derive(Resource, Clone, Deref, DerefMut)]
168pub struct RenderAdapterInfo(pub WgpuWrapper<AdapterInfo>);
169
170const GPU_NOT_FOUND_ERROR_MESSAGE: &str = if cfg!(target_os = "linux") {
171    "Unable to find a GPU! Make sure you have installed required drivers! For extra information, see: https://github.com/bevyengine/bevy/blob/latest/docs/linux_dependencies.md"
172} else {
173    "Unable to find a GPU! Make sure you have installed required drivers!"
174};
175
176/// Initializes the renderer by retrieving and preparing the GPU instance, device and queue
177/// for the specified backend.
178pub async fn initialize_renderer(
179    instance: &Instance,
180    options: &WgpuSettings,
181    request_adapter_options: &RequestAdapterOptions<'_, '_>,
182) -> (RenderDevice, RenderQueue, RenderAdapterInfo, RenderAdapter) {
183    let adapter = instance
184        .request_adapter(request_adapter_options)
185        .await
186        .expect(GPU_NOT_FOUND_ERROR_MESSAGE);
187
188    let adapter_info = adapter.get_info();
189    info!("{:?}", adapter_info);
190
191    if adapter_info.device_type == DeviceType::Cpu {
192        warn!(
193            "The selected adapter is using a driver that only supports software rendering. \
194             This is likely to be very slow. See https://bevyengine.org/learn/errors/b0006/"
195        );
196    }
197
198    #[cfg(feature = "wgpu_trace")]
199    let trace_path = {
200        let path = std::path::Path::new("wgpu_trace");
201        // ignore potential error, wgpu will log it
202        let _ = std::fs::create_dir(path);
203        Some(path)
204    };
205    #[cfg(not(feature = "wgpu_trace"))]
206    let trace_path = None;
207
208    // Maybe get features and limits based on what is supported by the adapter/backend
209    let mut features = wgpu::Features::empty();
210    let mut limits = options.limits.clone();
211    if matches!(options.priority, WgpuSettingsPriority::Functionality) {
212        features = adapter.features();
213        if adapter_info.device_type == wgpu::DeviceType::DiscreteGpu {
214            // `MAPPABLE_PRIMARY_BUFFERS` can have a significant, negative performance impact for
215            // discrete GPUs due to having to transfer data across the PCI-E bus and so it
216            // should not be automatically enabled in this case. It is however beneficial for
217            // integrated GPUs.
218            features -= wgpu::Features::MAPPABLE_PRIMARY_BUFFERS;
219        }
220
221        // RAY_QUERY and RAY_TRACING_ACCELERATION STRUCTURE will sometimes cause DeviceLost failures on platforms
222        // that report them as supported:
223        // <https://github.com/gfx-rs/wgpu/issues/5488>
224        // WGPU also currently doesn't actually support these features yet, so we should disable
225        // them until they are safe to enable.
226        features -= wgpu::Features::RAY_QUERY;
227        features -= wgpu::Features::RAY_TRACING_ACCELERATION_STRUCTURE;
228
229        limits = adapter.limits();
230    }
231
232    // Enforce the disabled features
233    if let Some(disabled_features) = options.disabled_features {
234        features -= disabled_features;
235    }
236    // NOTE: |= is used here to ensure that any explicitly-enabled features are respected.
237    features |= options.features;
238
239    // Enforce the limit constraints
240    if let Some(constrained_limits) = options.constrained_limits.as_ref() {
241        // NOTE: Respect the configured limits as an 'upper bound'. This means for 'max' limits, we
242        // take the minimum of the calculated limits according to the adapter/backend and the
243        // specified max_limits. For 'min' limits, take the maximum instead. This is intended to
244        // err on the side of being conservative. We can't claim 'higher' limits that are supported
245        // but we can constrain to 'lower' limits.
246        limits = wgpu::Limits {
247            max_texture_dimension_1d: limits
248                .max_texture_dimension_1d
249                .min(constrained_limits.max_texture_dimension_1d),
250            max_texture_dimension_2d: limits
251                .max_texture_dimension_2d
252                .min(constrained_limits.max_texture_dimension_2d),
253            max_texture_dimension_3d: limits
254                .max_texture_dimension_3d
255                .min(constrained_limits.max_texture_dimension_3d),
256            max_texture_array_layers: limits
257                .max_texture_array_layers
258                .min(constrained_limits.max_texture_array_layers),
259            max_bind_groups: limits
260                .max_bind_groups
261                .min(constrained_limits.max_bind_groups),
262            max_dynamic_uniform_buffers_per_pipeline_layout: limits
263                .max_dynamic_uniform_buffers_per_pipeline_layout
264                .min(constrained_limits.max_dynamic_uniform_buffers_per_pipeline_layout),
265            max_dynamic_storage_buffers_per_pipeline_layout: limits
266                .max_dynamic_storage_buffers_per_pipeline_layout
267                .min(constrained_limits.max_dynamic_storage_buffers_per_pipeline_layout),
268            max_sampled_textures_per_shader_stage: limits
269                .max_sampled_textures_per_shader_stage
270                .min(constrained_limits.max_sampled_textures_per_shader_stage),
271            max_samplers_per_shader_stage: limits
272                .max_samplers_per_shader_stage
273                .min(constrained_limits.max_samplers_per_shader_stage),
274            max_storage_buffers_per_shader_stage: limits
275                .max_storage_buffers_per_shader_stage
276                .min(constrained_limits.max_storage_buffers_per_shader_stage),
277            max_storage_textures_per_shader_stage: limits
278                .max_storage_textures_per_shader_stage
279                .min(constrained_limits.max_storage_textures_per_shader_stage),
280            max_uniform_buffers_per_shader_stage: limits
281                .max_uniform_buffers_per_shader_stage
282                .min(constrained_limits.max_uniform_buffers_per_shader_stage),
283            max_uniform_buffer_binding_size: limits
284                .max_uniform_buffer_binding_size
285                .min(constrained_limits.max_uniform_buffer_binding_size),
286            max_storage_buffer_binding_size: limits
287                .max_storage_buffer_binding_size
288                .min(constrained_limits.max_storage_buffer_binding_size),
289            max_vertex_buffers: limits
290                .max_vertex_buffers
291                .min(constrained_limits.max_vertex_buffers),
292            max_vertex_attributes: limits
293                .max_vertex_attributes
294                .min(constrained_limits.max_vertex_attributes),
295            max_vertex_buffer_array_stride: limits
296                .max_vertex_buffer_array_stride
297                .min(constrained_limits.max_vertex_buffer_array_stride),
298            max_push_constant_size: limits
299                .max_push_constant_size
300                .min(constrained_limits.max_push_constant_size),
301            min_uniform_buffer_offset_alignment: limits
302                .min_uniform_buffer_offset_alignment
303                .max(constrained_limits.min_uniform_buffer_offset_alignment),
304            min_storage_buffer_offset_alignment: limits
305                .min_storage_buffer_offset_alignment
306                .max(constrained_limits.min_storage_buffer_offset_alignment),
307            max_inter_stage_shader_components: limits
308                .max_inter_stage_shader_components
309                .min(constrained_limits.max_inter_stage_shader_components),
310            max_compute_workgroup_storage_size: limits
311                .max_compute_workgroup_storage_size
312                .min(constrained_limits.max_compute_workgroup_storage_size),
313            max_compute_invocations_per_workgroup: limits
314                .max_compute_invocations_per_workgroup
315                .min(constrained_limits.max_compute_invocations_per_workgroup),
316            max_compute_workgroup_size_x: limits
317                .max_compute_workgroup_size_x
318                .min(constrained_limits.max_compute_workgroup_size_x),
319            max_compute_workgroup_size_y: limits
320                .max_compute_workgroup_size_y
321                .min(constrained_limits.max_compute_workgroup_size_y),
322            max_compute_workgroup_size_z: limits
323                .max_compute_workgroup_size_z
324                .min(constrained_limits.max_compute_workgroup_size_z),
325            max_compute_workgroups_per_dimension: limits
326                .max_compute_workgroups_per_dimension
327                .min(constrained_limits.max_compute_workgroups_per_dimension),
328            max_buffer_size: limits
329                .max_buffer_size
330                .min(constrained_limits.max_buffer_size),
331            max_bindings_per_bind_group: limits
332                .max_bindings_per_bind_group
333                .min(constrained_limits.max_bindings_per_bind_group),
334            max_non_sampler_bindings: limits
335                .max_non_sampler_bindings
336                .min(constrained_limits.max_non_sampler_bindings),
337            max_color_attachments: limits
338                .max_color_attachments
339                .min(constrained_limits.max_color_attachments),
340            max_color_attachment_bytes_per_sample: limits
341                .max_color_attachment_bytes_per_sample
342                .min(constrained_limits.max_color_attachment_bytes_per_sample),
343            min_subgroup_size: limits
344                .min_subgroup_size
345                .max(constrained_limits.min_subgroup_size),
346            max_subgroup_size: limits
347                .max_subgroup_size
348                .min(constrained_limits.max_subgroup_size),
349        };
350    }
351
352    let (device, queue) = adapter
353        .request_device(
354            &wgpu::DeviceDescriptor {
355                label: options.device_label.as_ref().map(|a| a.as_ref()),
356                required_features: features,
357                required_limits: limits,
358            },
359            trace_path,
360        )
361        .await
362        .unwrap();
363    let queue = Arc::new(WgpuWrapper::new(queue));
364    let adapter = Arc::new(WgpuWrapper::new(adapter));
365    (
366        RenderDevice::from(device),
367        RenderQueue(queue),
368        RenderAdapterInfo(WgpuWrapper::new(adapter_info)),
369        RenderAdapter(adapter),
370    )
371}
372
373/// The context with all information required to interact with the GPU.
374///
375/// The [`RenderDevice`] is used to create render resources and the
376/// the [`CommandEncoder`] is used to record a series of GPU operations.
377pub struct RenderContext<'w> {
378    render_device: RenderDevice,
379    command_encoder: Option<CommandEncoder>,
380    command_buffer_queue: Vec<QueuedCommandBuffer<'w>>,
381    force_serial: bool,
382    diagnostics_recorder: Option<Arc<DiagnosticsRecorder>>,
383}
384
385impl<'w> RenderContext<'w> {
386    /// Creates a new [`RenderContext`] from a [`RenderDevice`].
387    pub fn new(
388        render_device: RenderDevice,
389        adapter_info: AdapterInfo,
390        diagnostics_recorder: Option<DiagnosticsRecorder>,
391    ) -> Self {
392        // HACK: Parallel command encoding is currently bugged on AMD + Windows + Vulkan with wgpu 0.19.1
393        #[cfg(target_os = "windows")]
394        let force_serial =
395            adapter_info.driver.contains("AMD") && adapter_info.backend == wgpu::Backend::Vulkan;
396        #[cfg(not(target_os = "windows"))]
397        let force_serial = {
398            drop(adapter_info);
399            false
400        };
401
402        Self {
403            render_device,
404            command_encoder: None,
405            command_buffer_queue: Vec::new(),
406            force_serial,
407            diagnostics_recorder: diagnostics_recorder.map(Arc::new),
408        }
409    }
410
411    /// Gets the underlying [`RenderDevice`].
412    pub fn render_device(&self) -> &RenderDevice {
413        &self.render_device
414    }
415
416    /// Gets the diagnostics recorder, used to track elapsed time and pipeline statistics
417    /// of various render and compute passes.
418    pub fn diagnostic_recorder(&self) -> impl RecordDiagnostics {
419        self.diagnostics_recorder.clone()
420    }
421
422    /// Gets the current [`CommandEncoder`].
423    pub fn command_encoder(&mut self) -> &mut CommandEncoder {
424        self.command_encoder.get_or_insert_with(|| {
425            self.render_device
426                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
427        })
428    }
429
430    /// Creates a new [`TrackedRenderPass`] for the context,
431    /// configured using the provided `descriptor`.
432    pub fn begin_tracked_render_pass<'a>(
433        &'a mut self,
434        descriptor: RenderPassDescriptor<'a, '_>,
435    ) -> TrackedRenderPass<'a> {
436        // Cannot use command_encoder() as we need to split the borrow on self
437        let command_encoder = self.command_encoder.get_or_insert_with(|| {
438            self.render_device
439                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default())
440        });
441
442        let render_pass = command_encoder.begin_render_pass(&descriptor);
443        TrackedRenderPass::new(&self.render_device, render_pass)
444    }
445
446    /// Append a [`CommandBuffer`] to the command buffer queue.
447    ///
448    /// If present, this will flush the currently unflushed [`CommandEncoder`]
449    /// into a [`CommandBuffer`] into the queue before appending the provided
450    /// buffer.
451    pub fn add_command_buffer(&mut self, command_buffer: CommandBuffer) {
452        self.flush_encoder();
453
454        self.command_buffer_queue
455            .push(QueuedCommandBuffer::Ready(command_buffer));
456    }
457
458    /// Append a function that will generate a [`CommandBuffer`] to the
459    /// command buffer queue, to be ran later.
460    ///
461    /// If present, this will flush the currently unflushed [`CommandEncoder`]
462    /// into a [`CommandBuffer`] into the queue before appending the provided
463    /// buffer.
464    pub fn add_command_buffer_generation_task(
465        &mut self,
466        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
467        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w + Send,
468        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
469        task: impl FnOnce(RenderDevice) -> CommandBuffer + 'w,
470    ) {
471        self.flush_encoder();
472
473        self.command_buffer_queue
474            .push(QueuedCommandBuffer::Task(Box::new(task)));
475    }
476
477    /// Finalizes and returns the queue of [`CommandBuffer`]s.
478    ///
479    /// This function will wait until all command buffer generation tasks are complete
480    /// by running them in parallel (where supported).
481    ///
482    /// The [`CommandBuffer`]s will be returned in the order that they were added.
483    pub fn finish(
484        mut self,
485    ) -> (
486        Vec<CommandBuffer>,
487        RenderDevice,
488        Option<DiagnosticsRecorder>,
489    ) {
490        self.flush_encoder();
491
492        let mut command_buffers = Vec::with_capacity(self.command_buffer_queue.len());
493
494        #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
495        {
496            let mut task_based_command_buffers = ComputeTaskPool::get().scope(|task_pool| {
497                for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate()
498                {
499                    match queued_command_buffer {
500                        QueuedCommandBuffer::Ready(command_buffer) => {
501                            command_buffers.push((i, command_buffer));
502                        }
503                        QueuedCommandBuffer::Task(command_buffer_generation_task) => {
504                            let render_device = self.render_device.clone();
505                            if self.force_serial {
506                                command_buffers
507                                    .push((i, command_buffer_generation_task(render_device)));
508                            } else {
509                                task_pool.spawn(async move {
510                                    (i, command_buffer_generation_task(render_device))
511                                });
512                            }
513                        }
514                    }
515                }
516            });
517            command_buffers.append(&mut task_based_command_buffers);
518        }
519
520        #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
521        for (i, queued_command_buffer) in self.command_buffer_queue.into_iter().enumerate() {
522            match queued_command_buffer {
523                QueuedCommandBuffer::Ready(command_buffer) => {
524                    command_buffers.push((i, command_buffer));
525                }
526                QueuedCommandBuffer::Task(command_buffer_generation_task) => {
527                    let render_device = self.render_device.clone();
528                    command_buffers.push((i, command_buffer_generation_task(render_device)));
529                }
530            }
531        }
532
533        command_buffers.sort_unstable_by_key(|(i, _)| *i);
534
535        let mut command_buffers = command_buffers
536            .into_iter()
537            .map(|(_, cb)| cb)
538            .collect::<Vec<CommandBuffer>>();
539
540        let mut diagnostics_recorder = self.diagnostics_recorder.take().map(|v| {
541            Arc::try_unwrap(v)
542                .ok()
543                .expect("diagnostic recorder shouldn't be held longer than necessary")
544        });
545
546        if let Some(recorder) = &mut diagnostics_recorder {
547            let mut command_encoder = self
548                .render_device
549                .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
550            recorder.resolve(&mut command_encoder);
551            command_buffers.push(command_encoder.finish());
552        }
553
554        (command_buffers, self.render_device, diagnostics_recorder)
555    }
556
557    fn flush_encoder(&mut self) {
558        if let Some(encoder) = self.command_encoder.take() {
559            self.command_buffer_queue
560                .push(QueuedCommandBuffer::Ready(encoder.finish()));
561        }
562    }
563}
564
565enum QueuedCommandBuffer<'w> {
566    Ready(CommandBuffer),
567    #[cfg(not(all(target_arch = "wasm32", target_feature = "atomics")))]
568    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w + Send>),
569    #[cfg(all(target_arch = "wasm32", target_feature = "atomics"))]
570    Task(Box<dyn FnOnce(RenderDevice) -> CommandBuffer + 'w>),
571}