1use bevy_app::{App, Plugin};
4use bevy_derive::{Deref, DerefMut};
5use bevy_ecs::{
6 entity::Entity,
7 query::{Has, With},
8 schedule::IntoSystemConfigs as _,
9 system::{Query, Res, ResMut, Resource, StaticSystemParam},
10 world::{FromWorld, World},
11};
12use bevy_encase_derive::ShaderType;
13use bevy_utils::EntityHashMap;
14use bytemuck::{Pod, Zeroable};
15use nonmax::NonMaxU32;
16use smallvec::smallvec;
17use wgpu::{BindingResource, BufferUsages, DownlevelFlags, Features};
18
19use crate::{
20 render_phase::{
21 BinnedPhaseItem, BinnedRenderPhaseBatch, CachedRenderPipelinePhaseItem,
22 PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
23 ViewBinnedRenderPhases, ViewSortedRenderPhases,
24 },
25 render_resource::{BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
26 renderer::{RenderAdapter, RenderDevice, RenderQueue},
27 view::{GpuCulling, ViewTarget},
28 Render, RenderApp, RenderSet,
29};
30
31use super::{BatchMeta, GetBatchData, GetFullBatchData};
32
33pub struct BatchingPlugin;
34
35impl Plugin for BatchingPlugin {
36 fn build(&self, app: &mut App) {
37 let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
38 return;
39 };
40
41 render_app
42 .insert_resource(IndirectParametersBuffer::new())
43 .add_systems(
44 Render,
45 write_indirect_parameters_buffer.in_set(RenderSet::PrepareResourcesFlush),
46 );
47 }
48
49 fn finish(&self, app: &mut App) {
50 let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
51 return;
52 };
53
54 render_app.init_resource::<GpuPreprocessingSupport>();
55 }
56}
57
58#[derive(Clone, Copy, PartialEq, Resource)]
67pub enum GpuPreprocessingSupport {
68 None,
70 PreprocessingOnly,
72 Culling,
74}
75
76#[derive(Resource)]
87pub struct BatchedInstanceBuffers<BD, BDI>
88where
89 BD: GpuArrayBufferable + Sync + Send + 'static,
90 BDI: Pod,
91{
92 pub data_buffer: UninitBufferVec<BD>,
97
98 pub work_item_buffers: EntityHashMap<Entity, PreprocessWorkItemBuffer>,
103
104 pub current_input_buffer: RawBufferVec<BDI>,
108
109 pub previous_input_buffer: RawBufferVec<BDI>,
117}
118
119pub struct PreprocessWorkItemBuffer {
121 pub buffer: BufferVec<PreprocessWorkItem>,
123 pub gpu_culling: bool,
125}
126
127#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
130#[repr(C)]
131pub struct PreprocessWorkItem {
132 pub input_index: u32,
135 pub output_index: u32,
139}
140
141#[derive(Clone, Copy, Pod, Zeroable, ShaderType)]
177#[repr(C)]
178pub struct IndirectParameters {
179 pub vertex_or_index_count: u32,
182
183 pub instance_count: u32,
187
188 pub first_vertex: u32,
190
191 pub base_vertex_or_first_instance: u32,
194
195 pub first_instance: u32,
202}
203
204#[derive(Resource, Deref, DerefMut)]
206pub struct IndirectParametersBuffer(pub BufferVec<IndirectParameters>);
207
208impl IndirectParametersBuffer {
209 pub fn new() -> IndirectParametersBuffer {
211 IndirectParametersBuffer(BufferVec::new(
212 BufferUsages::STORAGE | BufferUsages::INDIRECT,
213 ))
214 }
215}
216
217impl Default for IndirectParametersBuffer {
218 fn default() -> Self {
219 Self::new()
220 }
221}
222
223impl FromWorld for GpuPreprocessingSupport {
224 fn from_world(world: &mut World) -> Self {
225 let adapter = world.resource::<RenderAdapter>();
226 let device = world.resource::<RenderDevice>();
227
228 if device.limits().max_compute_workgroup_size_x == 0 ||
229 (cfg!(target_os = "android") && {
231 let name = adapter.get_info().name;
232 name.strip_prefix("Adreno (TM) ").is_some_and(|version|
234 version != "720" && version.parse::<u16>().is_ok_and(|version| version <= 730)
235 )
236 })
237 {
238 GpuPreprocessingSupport::None
239 } else if !device
240 .features()
241 .contains(Features::INDIRECT_FIRST_INSTANCE) ||
242 !adapter.get_downlevel_capabilities().flags.contains(
243 DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW)
244 {
245 GpuPreprocessingSupport::PreprocessingOnly
246 } else {
247 GpuPreprocessingSupport::Culling
248 }
249 }
250}
251
252impl<BD, BDI> BatchedInstanceBuffers<BD, BDI>
253where
254 BD: GpuArrayBufferable + Sync + Send + 'static,
255 BDI: Pod,
256{
257 pub fn new() -> Self {
259 BatchedInstanceBuffers {
260 data_buffer: UninitBufferVec::new(BufferUsages::STORAGE),
261 work_item_buffers: EntityHashMap::default(),
262 current_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
263 previous_input_buffer: RawBufferVec::new(BufferUsages::STORAGE),
264 }
265 }
266
267 pub fn instance_data_binding(&self) -> Option<BindingResource> {
271 self.data_buffer
272 .buffer()
273 .map(|buffer| buffer.as_entire_binding())
274 }
275
276 pub fn clear(&mut self) {
278 self.data_buffer.clear();
279 self.current_input_buffer.clear();
280 self.previous_input_buffer.clear();
281 for work_item_buffer in self.work_item_buffers.values_mut() {
282 work_item_buffer.buffer.clear();
283 }
284 }
285}
286
287impl<BD, BDI> Default for BatchedInstanceBuffers<BD, BDI>
288where
289 BD: GpuArrayBufferable + Sync + Send + 'static,
290 BDI: Pod,
291{
292 fn default() -> Self {
293 Self::new()
294 }
295}
296
297struct SortedRenderBatch<F>
300where
301 F: GetBatchData,
302{
303 phase_item_start_index: u32,
306
307 instance_start_index: u32,
309
310 indirect_parameters_index: Option<NonMaxU32>,
315
316 meta: Option<BatchMeta<F::CompareData>>,
321}
322
323impl<F> SortedRenderBatch<F>
324where
325 F: GetBatchData,
326{
327 fn flush<I>(self, instance_end_index: u32, phase: &mut SortedRenderPhase<I>)
333 where
334 I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
335 {
336 let (batch_range, batch_extra_index) =
337 phase.items[self.phase_item_start_index as usize].batch_range_and_extra_index_mut();
338 *batch_range = self.instance_start_index..instance_end_index;
339 *batch_extra_index =
340 PhaseItemExtraIndex::maybe_indirect_parameters_index(self.indirect_parameters_index);
341 }
342}
343
344pub fn clear_batched_gpu_instance_buffers<GFBD>(
351 gpu_batched_instance_buffers: Option<
352 ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
353 >,
354) where
355 GFBD: GetFullBatchData,
356{
357 if let Some(mut gpu_batched_instance_buffers) = gpu_batched_instance_buffers {
358 gpu_batched_instance_buffers.clear();
359 }
360}
361
362pub fn delete_old_work_item_buffers<GFBD>(
369 mut gpu_batched_instance_buffers: ResMut<
370 BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>,
371 >,
372 view_targets: Query<Entity, With<ViewTarget>>,
373) where
374 GFBD: GetFullBatchData,
375{
376 gpu_batched_instance_buffers
377 .work_item_buffers
378 .retain(|entity, _| view_targets.contains(*entity));
379}
380
381pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
385 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
386 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
387 mut sorted_render_phases: ResMut<ViewSortedRenderPhases<I>>,
388 mut views: Query<(Entity, Has<GpuCulling>)>,
389 system_param_item: StaticSystemParam<GFBD::Param>,
390) where
391 I: CachedRenderPipelinePhaseItem + SortedPhaseItem,
392 GFBD: GetFullBatchData,
393{
394 let BatchedInstanceBuffers {
396 ref mut data_buffer,
397 ref mut work_item_buffers,
398 ..
399 } = gpu_array_buffer.into_inner();
400
401 for (view, gpu_culling) in &mut views {
402 let Some(phase) = sorted_render_phases.get_mut(&view) else {
403 continue;
404 };
405
406 let work_item_buffer =
408 work_item_buffers
409 .entry(view)
410 .or_insert_with(|| PreprocessWorkItemBuffer {
411 buffer: BufferVec::new(BufferUsages::STORAGE),
412 gpu_culling,
413 });
414
415 let mut batch: Option<SortedRenderBatch<GFBD>> = None;
417 for current_index in 0..phase.items.len() {
418 let current_batch_input_index = GFBD::get_index_and_compare_data(
421 &system_param_item,
422 phase.items[current_index].entity(),
423 );
424
425 let Some((current_input_index, current_meta)) = current_batch_input_index else {
430 if let Some(batch) = batch.take() {
432 batch.flush(data_buffer.len() as u32, phase);
433 }
434
435 continue;
436 };
437 let current_meta =
438 current_meta.map(|meta| BatchMeta::new(&phase.items[current_index], meta));
439
440 let can_batch = batch.as_ref().is_some_and(|batch| {
443 match (¤t_meta, &batch.meta) {
445 (Some(current_meta), Some(batch_meta)) => current_meta == batch_meta,
446 (_, _) => false,
447 }
448 });
449
450 let current_entity = phase.items[current_index].entity();
452 let output_index = data_buffer.add() as u32;
453
454 if !can_batch {
456 if let Some(batch) = batch.take() {
458 batch.flush(output_index, phase);
459 }
460
461 let indirect_parameters_index = if gpu_culling {
463 GFBD::get_batch_indirect_parameters_index(
464 &system_param_item,
465 &mut indirect_parameters_buffer,
466 current_entity,
467 output_index,
468 )
469 } else {
470 None
471 };
472 batch = Some(SortedRenderBatch {
473 phase_item_start_index: current_index as u32,
474 instance_start_index: output_index,
475 indirect_parameters_index,
476 meta: current_meta,
477 });
478 }
479
480 if let Some(batch) = batch.as_ref() {
483 work_item_buffer.buffer.push(PreprocessWorkItem {
484 input_index: current_input_index.into(),
485 output_index: match batch.indirect_parameters_index {
486 Some(indirect_parameters_index) => indirect_parameters_index.into(),
487 None => output_index,
488 },
489 });
490 }
491 }
492
493 if let Some(batch) = batch.take() {
495 batch.flush(data_buffer.len() as u32, phase);
496 }
497 }
498}
499
500pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
502 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
503 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
504 mut binned_render_phases: ResMut<ViewBinnedRenderPhases<BPI>>,
505 mut views: Query<(Entity, Has<GpuCulling>)>,
506 param: StaticSystemParam<GFBD::Param>,
507) where
508 BPI: BinnedPhaseItem,
509 GFBD: GetFullBatchData,
510{
511 let system_param_item = param.into_inner();
512
513 let BatchedInstanceBuffers {
514 ref mut data_buffer,
515 ref mut work_item_buffers,
516 ..
517 } = gpu_array_buffer.into_inner();
518
519 for (view, gpu_culling) in &mut views {
520 let Some(phase) = binned_render_phases.get_mut(&view) else {
521 continue;
522 };
523
524 let work_item_buffer =
527 work_item_buffers
528 .entry(view)
529 .or_insert_with(|| PreprocessWorkItemBuffer {
530 buffer: BufferVec::new(BufferUsages::STORAGE),
531 gpu_culling,
532 });
533
534 for key in &phase.batchable_mesh_keys {
537 let mut batch: Option<BinnedRenderPhaseBatch> = None;
538 for &entity in &phase.batchable_mesh_values[key] {
539 let Some(input_index) = GFBD::get_binned_index(&system_param_item, entity) else {
540 continue;
541 };
542 let output_index = data_buffer.add() as u32;
543
544 match batch {
545 Some(ref mut batch) => {
546 batch.instance_range.end = output_index + 1;
547 work_item_buffer.buffer.push(PreprocessWorkItem {
548 input_index: input_index.into(),
549 output_index: batch
550 .extra_index
551 .as_indirect_parameters_index()
552 .unwrap_or(output_index),
553 });
554 }
555
556 None if gpu_culling => {
557 let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
558 &system_param_item,
559 &mut indirect_parameters_buffer,
560 entity,
561 output_index,
562 );
563 work_item_buffer.buffer.push(PreprocessWorkItem {
564 input_index: input_index.into(),
565 output_index: indirect_parameters_index.unwrap_or_default().into(),
566 });
567 batch = Some(BinnedRenderPhaseBatch {
568 representative_entity: entity,
569 instance_range: output_index..output_index + 1,
570 extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
571 indirect_parameters_index,
572 ),
573 });
574 }
575
576 None => {
577 work_item_buffer.buffer.push(PreprocessWorkItem {
578 input_index: input_index.into(),
579 output_index,
580 });
581 batch = Some(BinnedRenderPhaseBatch {
582 representative_entity: entity,
583 instance_range: output_index..output_index + 1,
584 extra_index: PhaseItemExtraIndex::NONE,
585 });
586 }
587 }
588 }
589
590 if let Some(batch) = batch {
591 phase.batch_sets.push(smallvec![batch]);
592 }
593 }
594
595 for key in &phase.unbatchable_mesh_keys {
597 let unbatchables = phase.unbatchable_mesh_values.get_mut(key).unwrap();
598 for &entity in &unbatchables.entities {
599 let Some(input_index) = GFBD::get_binned_index(&system_param_item, entity) else {
600 continue;
601 };
602 let output_index = data_buffer.add() as u32;
603
604 if gpu_culling {
605 let indirect_parameters_index = GFBD::get_batch_indirect_parameters_index(
606 &system_param_item,
607 &mut indirect_parameters_buffer,
608 entity,
609 output_index,
610 )
611 .unwrap_or_default();
612 work_item_buffer.buffer.push(PreprocessWorkItem {
613 input_index: input_index.into(),
614 output_index: indirect_parameters_index.into(),
615 });
616 unbatchables
617 .buffer_indices
618 .add(UnbatchableBinnedEntityIndices {
619 instance_index: indirect_parameters_index.into(),
620 extra_index: PhaseItemExtraIndex::indirect_parameters_index(
621 indirect_parameters_index.into(),
622 ),
623 });
624 } else {
625 work_item_buffer.buffer.push(PreprocessWorkItem {
626 input_index: input_index.into(),
627 output_index,
628 });
629 unbatchables
630 .buffer_indices
631 .add(UnbatchableBinnedEntityIndices {
632 instance_index: output_index,
633 extra_index: PhaseItemExtraIndex::NONE,
634 });
635 }
636 }
637 }
638 }
639}
640
641pub fn write_batched_instance_buffers<GFBD>(
643 render_device: Res<RenderDevice>,
644 render_queue: Res<RenderQueue>,
645 gpu_array_buffer: ResMut<BatchedInstanceBuffers<GFBD::BufferData, GFBD::BufferInputData>>,
646) where
647 GFBD: GetFullBatchData,
648{
649 let BatchedInstanceBuffers {
650 ref mut data_buffer,
651 work_item_buffers: ref mut index_buffers,
652 ref mut current_input_buffer,
653 previous_input_buffer: _,
654 } = gpu_array_buffer.into_inner();
655
656 data_buffer.write_buffer(&render_device);
657 current_input_buffer.write_buffer(&render_device, &render_queue);
658 for index_buffer in index_buffers.values_mut() {
662 index_buffer
663 .buffer
664 .write_buffer(&render_device, &render_queue);
665 }
666}
667
668pub fn write_indirect_parameters_buffer(
669 render_device: Res<RenderDevice>,
670 render_queue: Res<RenderQueue>,
671 mut indirect_parameters_buffer: ResMut<IndirectParametersBuffer>,
672) {
673 indirect_parameters_buffer.write_buffer(&render_device, &render_queue);
674 indirect_parameters_buffer.clear();
675}