wgpu_hal/vulkan/mod.rs
1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8 - temporarily allocating `Vec` on heap, where overhead is permitted
9 - growing temporary local storage
10 - using `implace_it` on iterators
11
12## Framebuffers and Render passes
13
14Render passes are cached on the device and kept forever.
15
16Framebuffers are also cached on the device, but they are removed when
17any of the image views (they have) gets removed.
18If Vulkan supports image-less framebuffers,
19then the actual views are excluded from the framebuffer key.
20
21## Fences
22
23If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
24Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
25
26!*/
27
28mod adapter;
29mod command;
30mod conv;
31mod device;
32mod instance;
33
34use std::{borrow::Borrow, collections::HashSet, ffi::CStr, fmt, mem, num::NonZeroU32, sync::Arc};
35
36use arrayvec::ArrayVec;
37use ash::{
38 extensions::{ext, khr},
39 vk,
40};
41use parking_lot::{Mutex, RwLock};
42
43const MILLIS_TO_NANOS: u64 = 1_000_000;
44const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
45
46#[derive(Clone, Debug)]
47pub struct Api;
48
49impl crate::Api for Api {
50 type Instance = Instance;
51 type Surface = Surface;
52 type Adapter = Adapter;
53 type Device = Device;
54
55 type Queue = Queue;
56 type CommandEncoder = CommandEncoder;
57 type CommandBuffer = CommandBuffer;
58
59 type Buffer = Buffer;
60 type Texture = Texture;
61 type SurfaceTexture = SurfaceTexture;
62 type TextureView = TextureView;
63 type Sampler = Sampler;
64 type QuerySet = QuerySet;
65 type Fence = Fence;
66 type AccelerationStructure = AccelerationStructure;
67
68 type BindGroupLayout = BindGroupLayout;
69 type BindGroup = BindGroup;
70 type PipelineLayout = PipelineLayout;
71 type ShaderModule = ShaderModule;
72 type RenderPipeline = RenderPipeline;
73 type ComputePipeline = ComputePipeline;
74}
75
76struct DebugUtils {
77 extension: ext::DebugUtils,
78 messenger: vk::DebugUtilsMessengerEXT,
79
80 /// Owning pointer to the debug messenger callback user data.
81 ///
82 /// `InstanceShared::drop` destroys the debug messenger before
83 /// dropping this, so the callback should never receive a dangling
84 /// user data pointer.
85 #[allow(dead_code)]
86 callback_data: Box<DebugUtilsMessengerUserData>,
87}
88
89pub struct DebugUtilsCreateInfo {
90 severity: vk::DebugUtilsMessageSeverityFlagsEXT,
91 message_type: vk::DebugUtilsMessageTypeFlagsEXT,
92 callback_data: Box<DebugUtilsMessengerUserData>,
93}
94
95#[derive(Debug)]
96/// The properties related to the validation layer needed for the
97/// DebugUtilsMessenger for their workarounds
98struct ValidationLayerProperties {
99 /// Validation layer description, from `vk::LayerProperties`.
100 layer_description: std::ffi::CString,
101
102 /// Validation layer specification version, from `vk::LayerProperties`.
103 layer_spec_version: u32,
104}
105
106/// User data needed by `instance::debug_utils_messenger_callback`.
107///
108/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
109/// pointer refers to one of these values.
110#[derive(Debug)]
111pub struct DebugUtilsMessengerUserData {
112 /// The properties related to the validation layer, if present
113 validation_layer_properties: Option<ValidationLayerProperties>,
114
115 /// If the OBS layer is present. OBS never increments the version of their layer,
116 /// so there's no reason to have the version.
117 has_obs_layer: bool,
118}
119
120pub struct InstanceShared {
121 raw: ash::Instance,
122 extensions: Vec<&'static CStr>,
123 drop_guard: Option<crate::DropGuard>,
124 flags: wgt::InstanceFlags,
125 debug_utils: Option<DebugUtils>,
126 get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>,
127 entry: ash::Entry,
128 has_nv_optimus: bool,
129 android_sdk_version: u32,
130 /// The instance API version.
131 ///
132 /// Which is the version of Vulkan supported for instance-level functionality.
133 ///
134 /// It is associated with a `VkInstance` and its children,
135 /// except for a `VkPhysicalDevice` and its children.
136 instance_api_version: u32,
137}
138
139pub struct Instance {
140 shared: Arc<InstanceShared>,
141}
142
143/// The semaphores needed to use one image in a swapchain.
144#[derive(Debug)]
145struct SwapchainImageSemaphores {
146 /// A semaphore that is signaled when this image is safe for us to modify.
147 ///
148 /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
149 /// image that we should use, that image may actually still be in use by the
150 /// presentation engine, and is not yet safe to modify. However, that
151 /// function does accept a semaphore that it will signal when the image is
152 /// indeed safe to begin messing with.
153 ///
154 /// This semaphore is:
155 ///
156 /// - waited for by the first queue submission to operate on this image
157 /// since it was acquired, and
158 ///
159 /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
160 /// for us to use.
161 ///
162 /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
163 acquire: vk::Semaphore,
164
165 /// True if the next command submission operating on this image should wait
166 /// for [`acquire`].
167 ///
168 /// We must wait for `acquire` before drawing to this swapchain image, but
169 /// because `wgpu-hal` queue submissions are always strongly ordered, only
170 /// the first submission that works with a swapchain image actually needs to
171 /// wait. We set this flag when this image is acquired, and clear it the
172 /// first time it's passed to [`Queue::submit`] as a surface texture.
173 ///
174 /// [`acquire`]: SwapchainImageSemaphores::acquire
175 /// [`Queue::submit`]: crate::Queue::submit
176 should_wait_for_acquire: bool,
177
178 /// A pool of semaphores for ordering presentation after drawing.
179 ///
180 /// The first [`present_index`] semaphores in this vector are:
181 ///
182 /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
183 /// image, and
184 ///
185 /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
186 /// this image, when the submission finishes execution.
187 ///
188 /// This vector accumulates one semaphore per submission that writes to this
189 /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
190 /// requires a semaphore to order it with respect to drawing commands, and
191 /// we can't attach new completion semaphores to a command submission after
192 /// it's been submitted. This means that, at submission time, we must create
193 /// the semaphore we might need if the caller's next action is to enqueue a
194 /// presentation of this image.
195 ///
196 /// An alternative strategy would be for presentation to enqueue an empty
197 /// submit, ordered relative to other submits in the usual way, and
198 /// signaling a single presentation semaphore. But we suspect that submits
199 /// are usually expensive enough, and semaphores usually cheap enough, that
200 /// performance-sensitive users will avoid making many submits, so that the
201 /// cost of accumulated semaphores will usually be less than the cost of an
202 /// additional submit.
203 ///
204 /// Only the first [`present_index`] semaphores in the vector are actually
205 /// going to be signalled by submitted commands, and need to be waited for
206 /// by the next present call. Any semaphores beyond that index were created
207 /// for prior presents and are simply being retained for recycling.
208 ///
209 /// [`present_index`]: SwapchainImageSemaphores::present_index
210 /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
211 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
212 present: Vec<vk::Semaphore>,
213
214 /// The number of semaphores in [`present`] to be signalled for this submission.
215 ///
216 /// [`present`]: SwapchainImageSemaphores::present
217 present_index: usize,
218
219 /// The fence value of the last command submission that wrote to this image.
220 ///
221 /// The next time we try to acquire this image, we'll block until
222 /// this submission finishes, proving that [`acquire`] is ready to
223 /// pass to `vkAcquireNextImageKHR` again.
224 ///
225 /// [`acquire`]: SwapchainImageSemaphores::acquire
226 previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainImageSemaphores {
230 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
231 Ok(Self {
232 acquire: device.new_binary_semaphore()?,
233 should_wait_for_acquire: true,
234 present: Vec::new(),
235 present_index: 0,
236 previously_used_submission_index: 0,
237 })
238 }
239
240 fn set_used_fence_value(&mut self, value: crate::FenceValue) {
241 self.previously_used_submission_index = value;
242 }
243
244 /// Return the semaphore that commands drawing to this image should wait for, if any.
245 ///
246 /// This only returns `Some` once per acquisition; see
247 /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
248 fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
249 if self.should_wait_for_acquire {
250 self.should_wait_for_acquire = false;
251 Some(self.acquire)
252 } else {
253 None
254 }
255 }
256
257 /// Return a semaphore that a submission that writes to this image should
258 /// signal when it's done.
259 ///
260 /// See [`SwapchainImageSemaphores::present`] for details.
261 fn get_submit_signal_semaphore(
262 &mut self,
263 device: &DeviceShared,
264 ) -> Result<vk::Semaphore, crate::DeviceError> {
265 // Try to recycle a semaphore we created for a previous presentation.
266 let sem = match self.present.get(self.present_index) {
267 Some(sem) => *sem,
268 None => {
269 let sem = device.new_binary_semaphore()?;
270 self.present.push(sem);
271 sem
272 }
273 };
274
275 self.present_index += 1;
276
277 Ok(sem)
278 }
279
280 /// Return the semaphores that a presentation of this image should wait on.
281 ///
282 /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
283 /// ends this image's acquisition should wait for. See
284 /// [`SwapchainImageSemaphores::present`] for details.
285 ///
286 /// Reset `self` to be ready for the next acquisition cycle.
287 ///
288 /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
289 fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
290 let old_index = self.present_index;
291
292 // Since this marks the end of this acquire/draw/present cycle, take the
293 // opportunity to reset `self` in preparation for the next acquisition.
294 self.present_index = 0;
295 self.should_wait_for_acquire = true;
296
297 &self.present[0..old_index]
298 }
299
300 unsafe fn destroy(&self, device: &ash::Device) {
301 unsafe {
302 device.destroy_semaphore(self.acquire, None);
303 for sem in &self.present {
304 device.destroy_semaphore(*sem, None);
305 }
306 }
307 }
308}
309
310struct Swapchain {
311 raw: vk::SwapchainKHR,
312 raw_flags: vk::SwapchainCreateFlagsKHR,
313 functor: khr::Swapchain,
314 device: Arc<DeviceShared>,
315 images: Vec<vk::Image>,
316 config: crate::SurfaceConfiguration,
317 view_formats: Vec<wgt::TextureFormat>,
318 /// One wait semaphore per swapchain image. This will be associated with the
319 /// surface texture, and later collected during submission.
320 ///
321 /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
322 /// data into the surface texture, so submit/present can use it.
323 surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
324 /// The index of the next semaphore to use. Ideally we would use the same
325 /// index as the image index, but we need to specify the semaphore as an argument
326 /// to the acquire_next_image function which is what tells us which image to use.
327 next_semaphore_index: usize,
328}
329
330impl Swapchain {
331 fn advance_surface_semaphores(&mut self) {
332 let semaphore_count = self.surface_semaphores.len();
333 self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
334 }
335
336 fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
337 self.surface_semaphores[self.next_semaphore_index].clone()
338 }
339}
340
341pub struct Surface {
342 raw: vk::SurfaceKHR,
343 functor: khr::Surface,
344 instance: Arc<InstanceShared>,
345 swapchain: RwLock<Option<Swapchain>>,
346}
347
348#[derive(Debug)]
349pub struct SurfaceTexture {
350 index: u32,
351 texture: Texture,
352 surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
353}
354
355impl Borrow<Texture> for SurfaceTexture {
356 fn borrow(&self) -> &Texture {
357 &self.texture
358 }
359}
360
361pub struct Adapter {
362 raw: vk::PhysicalDevice,
363 instance: Arc<InstanceShared>,
364 //queue_families: Vec<vk::QueueFamilyProperties>,
365 known_memory_flags: vk::MemoryPropertyFlags,
366 phd_capabilities: adapter::PhysicalDeviceProperties,
367 //phd_features: adapter::PhysicalDeviceFeatures,
368 downlevel_flags: wgt::DownlevelFlags,
369 private_caps: PrivateCapabilities,
370 workarounds: Workarounds,
371}
372
373// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
374enum ExtensionFn<T> {
375 /// The loaded function pointer struct for an extension.
376 Extension(T),
377 /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
378 Promoted,
379}
380
381struct DeviceExtensionFunctions {
382 draw_indirect_count: Option<khr::DrawIndirectCount>,
383 timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>,
384 ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
385}
386
387struct RayTracingDeviceExtensionFunctions {
388 acceleration_structure: khr::AccelerationStructure,
389 buffer_device_address: khr::BufferDeviceAddress,
390}
391
392/// Set of internal capabilities, which don't show up in the exposed
393/// device geometry, but affect the code paths taken internally.
394#[derive(Clone, Debug)]
395struct PrivateCapabilities {
396 /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
397 ///
398 /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
399 flip_y_requires_shift: bool,
400 imageless_framebuffers: bool,
401 image_view_usage: bool,
402 timeline_semaphores: bool,
403 texture_d24: bool,
404 texture_d24_s8: bool,
405 texture_s8: bool,
406 /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
407 can_present: bool,
408 non_coherent_map_mask: wgt::BufferAddress,
409 robust_buffer_access: bool,
410 robust_image_access: bool,
411 robust_buffer_access2: bool,
412 robust_image_access2: bool,
413 zero_initialize_workgroup_memory: bool,
414 image_format_list: bool,
415}
416
417bitflags::bitflags!(
418 /// Workaround flags.
419 #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
420 pub struct Workarounds: u32 {
421 /// Only generate SPIR-V for one entry point at a time.
422 const SEPARATE_ENTRY_POINTS = 0x1;
423 /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
424 /// to a subpass resolve attachment array. This nulls out that pointer in that case.
425 const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
426 /// If the following code returns false, then nvidia will end up filling the wrong range.
427 ///
428 /// ```skip
429 /// fn nvidia_succeeds() -> bool {
430 /// # let (copy_length, start_offset) = (0, 0);
431 /// if copy_length >= 4096 {
432 /// if start_offset % 16 != 0 {
433 /// if copy_length == 4096 {
434 /// return true;
435 /// }
436 /// if copy_length % 16 == 0 {
437 /// return false;
438 /// }
439 /// }
440 /// }
441 /// true
442 /// }
443 /// ```
444 ///
445 /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
446 /// if they cover a range of 4096 bytes or more.
447 const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
448 }
449);
450
451#[derive(Clone, Debug, Eq, Hash, PartialEq)]
452struct AttachmentKey {
453 format: vk::Format,
454 layout: vk::ImageLayout,
455 ops: crate::AttachmentOps,
456}
457
458impl AttachmentKey {
459 /// Returns an attachment key for a compatible attachment.
460 fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
461 Self {
462 format,
463 layout,
464 ops: crate::AttachmentOps::all(),
465 }
466 }
467}
468
469#[derive(Clone, Eq, Hash, PartialEq)]
470struct ColorAttachmentKey {
471 base: AttachmentKey,
472 resolve: Option<AttachmentKey>,
473}
474
475#[derive(Clone, Eq, Hash, PartialEq)]
476struct DepthStencilAttachmentKey {
477 base: AttachmentKey,
478 stencil_ops: crate::AttachmentOps,
479}
480
481#[derive(Clone, Eq, Default, Hash, PartialEq)]
482struct RenderPassKey {
483 colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
484 depth_stencil: Option<DepthStencilAttachmentKey>,
485 sample_count: u32,
486 multiview: Option<NonZeroU32>,
487}
488
489#[derive(Clone, Debug, Eq, Hash, PartialEq)]
490struct FramebufferAttachment {
491 /// Can be NULL if the framebuffer is image-less
492 raw: vk::ImageView,
493 raw_image_flags: vk::ImageCreateFlags,
494 view_usage: crate::TextureUses,
495 view_format: wgt::TextureFormat,
496 raw_view_formats: Vec<vk::Format>,
497}
498
499#[derive(Clone, Eq, Hash, PartialEq)]
500struct FramebufferKey {
501 attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
502 extent: wgt::Extent3d,
503 sample_count: u32,
504}
505
506struct DeviceShared {
507 raw: ash::Device,
508 family_index: u32,
509 queue_index: u32,
510 raw_queue: ash::vk::Queue,
511 handle_is_owned: bool,
512 instance: Arc<InstanceShared>,
513 physical_device: ash::vk::PhysicalDevice,
514 enabled_extensions: Vec<&'static CStr>,
515 extension_fns: DeviceExtensionFunctions,
516 vendor_id: u32,
517 timestamp_period: f32,
518 private_caps: PrivateCapabilities,
519 workarounds: Workarounds,
520 features: wgt::Features,
521 render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
522 framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
523}
524
525pub struct Device {
526 shared: Arc<DeviceShared>,
527 mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
528 desc_allocator:
529 Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
530 valid_ash_memory_types: u32,
531 naga_options: naga::back::spv::Options<'static>,
532 #[cfg(feature = "renderdoc")]
533 render_doc: crate::auxil::renderdoc::RenderDoc,
534}
535
536/// Semaphores for forcing queue submissions to run in order.
537///
538/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
539/// ordered, then the first submission will finish on the GPU before the second
540/// submission begins. To get this behavior on Vulkan we need to pass semaphores
541/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
542/// and to signal when their execution is done.
543///
544/// Normally this can be done with a single semaphore, waited on and then
545/// signalled for each submission. At any given time there's exactly one
546/// submission that would signal the semaphore, and exactly one waiting on it,
547/// as Vulkan requires.
548///
549/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
550/// hang if we use a single semaphore. The workaround is to alternate between
551/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
552/// the workaround until, say, Oct 2026.
553///
554/// [`wgpu_hal::Queue`]: crate::Queue
555/// [`submit`]: crate::Queue::submit
556/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
557/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
558#[derive(Clone)]
559struct RelaySemaphores {
560 /// The semaphore the next submission should wait on before beginning
561 /// execution on the GPU. This is `None` for the first submission, which
562 /// should not wait on anything at all.
563 wait: Option<vk::Semaphore>,
564
565 /// The semaphore the next submission should signal when it has finished
566 /// execution on the GPU.
567 signal: vk::Semaphore,
568}
569
570impl RelaySemaphores {
571 fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
572 Ok(Self {
573 wait: None,
574 signal: device.new_binary_semaphore()?,
575 })
576 }
577
578 /// Advances the semaphores, returning the semaphores that should be used for a submission.
579 fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
580 let old = self.clone();
581
582 // Build the state for the next submission.
583 match self.wait {
584 None => {
585 // The `old` values describe the first submission to this queue.
586 // The second submission should wait on `old.signal`, and then
587 // signal a new semaphore which we'll create now.
588 self.wait = Some(old.signal);
589 self.signal = device.new_binary_semaphore()?;
590 }
591 Some(ref mut wait) => {
592 // What this submission signals, the next should wait.
593 mem::swap(wait, &mut self.signal);
594 }
595 };
596
597 Ok(old)
598 }
599
600 /// Destroys the semaphores.
601 unsafe fn destroy(&self, device: &ash::Device) {
602 unsafe {
603 if let Some(wait) = self.wait {
604 device.destroy_semaphore(wait, None);
605 }
606 device.destroy_semaphore(self.signal, None);
607 }
608 }
609}
610
611pub struct Queue {
612 raw: vk::Queue,
613 swapchain_fn: khr::Swapchain,
614 device: Arc<DeviceShared>,
615 family_index: u32,
616 relay_semaphores: Mutex<RelaySemaphores>,
617}
618
619#[derive(Debug)]
620pub struct Buffer {
621 raw: vk::Buffer,
622 block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
623}
624
625#[derive(Debug)]
626pub struct AccelerationStructure {
627 raw: vk::AccelerationStructureKHR,
628 buffer: vk::Buffer,
629 block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
630}
631
632#[derive(Debug)]
633pub struct Texture {
634 raw: vk::Image,
635 drop_guard: Option<crate::DropGuard>,
636 block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
637 usage: crate::TextureUses,
638 format: wgt::TextureFormat,
639 raw_flags: vk::ImageCreateFlags,
640 copy_size: crate::CopyExtent,
641 view_formats: Vec<wgt::TextureFormat>,
642}
643
644impl Texture {
645 /// # Safety
646 ///
647 /// - The image handle must not be manually destroyed
648 pub unsafe fn raw_handle(&self) -> vk::Image {
649 self.raw
650 }
651}
652
653#[derive(Debug)]
654pub struct TextureView {
655 raw: vk::ImageView,
656 layers: NonZeroU32,
657 attachment: FramebufferAttachment,
658}
659
660impl TextureView {
661 /// # Safety
662 ///
663 /// - The image view handle must not be manually destroyed
664 pub unsafe fn raw_handle(&self) -> vk::ImageView {
665 self.raw
666 }
667}
668
669#[derive(Debug)]
670pub struct Sampler {
671 raw: vk::Sampler,
672}
673
674#[derive(Debug)]
675pub struct BindGroupLayout {
676 raw: vk::DescriptorSetLayout,
677 desc_count: gpu_descriptor::DescriptorTotalCount,
678 types: Box<[(vk::DescriptorType, u32)]>,
679 /// Map of binding index to size,
680 binding_arrays: Vec<(u32, NonZeroU32)>,
681}
682
683#[derive(Debug)]
684pub struct PipelineLayout {
685 raw: vk::PipelineLayout,
686 binding_arrays: naga::back::spv::BindingMap,
687}
688
689#[derive(Debug)]
690pub struct BindGroup {
691 set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
692}
693
694/// Miscellaneous allocation recycling pool for `CommandAllocator`.
695#[derive(Default)]
696struct Temp {
697 marker: Vec<u8>,
698 buffer_barriers: Vec<vk::BufferMemoryBarrier>,
699 image_barriers: Vec<vk::ImageMemoryBarrier>,
700}
701
702unsafe impl Send for Temp {}
703unsafe impl Sync for Temp {}
704
705impl Temp {
706 fn clear(&mut self) {
707 self.marker.clear();
708 self.buffer_barriers.clear();
709 self.image_barriers.clear();
710 //see also - https://github.com/NotIntMan/inplace_it/issues/8
711 }
712
713 fn make_c_str(&mut self, name: &str) -> &CStr {
714 self.marker.clear();
715 self.marker.extend_from_slice(name.as_bytes());
716 self.marker.push(0);
717 unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
718 }
719}
720
721pub struct CommandEncoder {
722 raw: vk::CommandPool,
723 device: Arc<DeviceShared>,
724
725 /// The current command buffer, if `self` is in the ["recording"]
726 /// state.
727 ///
728 /// ["recording"]: crate::CommandEncoder
729 ///
730 /// If non-`null`, the buffer is in the Vulkan "recording" state.
731 active: vk::CommandBuffer,
732
733 /// What kind of pass we are currently within: compute or render.
734 bind_point: vk::PipelineBindPoint,
735
736 /// Allocation recycling pool for this encoder.
737 temp: Temp,
738
739 /// A pool of available command buffers.
740 ///
741 /// These are all in the Vulkan "initial" state.
742 free: Vec<vk::CommandBuffer>,
743
744 /// A pool of discarded command buffers.
745 ///
746 /// These could be in any Vulkan state except "pending".
747 discarded: Vec<vk::CommandBuffer>,
748
749 /// If this is true, the active renderpass enabled a debug span,
750 /// and needs to be disabled on renderpass close.
751 rpass_debug_marker_active: bool,
752
753 /// If set, the end of the next render/compute pass will write a timestamp at
754 /// the given pool & location.
755 end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
756}
757
758impl CommandEncoder {
759 /// # Safety
760 ///
761 /// - The command buffer handle must not be manually destroyed
762 pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
763 self.active
764 }
765}
766
767impl fmt::Debug for CommandEncoder {
768 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
769 f.debug_struct("CommandEncoder")
770 .field("raw", &self.raw)
771 .finish()
772 }
773}
774
775#[derive(Debug)]
776pub struct CommandBuffer {
777 raw: vk::CommandBuffer,
778}
779
780#[derive(Debug)]
781#[allow(clippy::large_enum_variant)]
782pub enum ShaderModule {
783 Raw(vk::ShaderModule),
784 Intermediate {
785 naga_shader: crate::NagaShader,
786 runtime_checks: bool,
787 },
788}
789
790#[derive(Debug)]
791pub struct RenderPipeline {
792 raw: vk::Pipeline,
793}
794
795#[derive(Debug)]
796pub struct ComputePipeline {
797 raw: vk::Pipeline,
798}
799
800#[derive(Debug)]
801pub struct QuerySet {
802 raw: vk::QueryPool,
803}
804
805/// The [`Api::Fence`] type for [`vulkan::Api`].
806///
807/// This is an `enum` because there are two possible implementations of
808/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
809/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
810/// require non-1.0 features.
811///
812/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
813/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
814/// otherwise.
815///
816/// [`Api::Fence`]: crate::Api::Fence
817/// [`vulkan::Api`]: Api
818/// [`Device::create_fence`]: crate::Device::create_fence
819/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
820/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
821/// [`FencePool`]: Fence::FencePool
822#[derive(Debug)]
823pub enum Fence {
824 /// A Vulkan [timeline semaphore].
825 ///
826 /// These are simpler to use than Vulkan fences, since timeline semaphores
827 /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
828 ///
829 /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
830 /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
831 TimelineSemaphore(vk::Semaphore),
832
833 /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
834 ///
835 /// The effective [`FenceValue`] of this variant is the greater of
836 /// `last_completed` and the maximum value associated with a signalled fence
837 /// in `active`.
838 ///
839 /// Fences are available in all versions of Vulkan, but since they only have
840 /// two states, "signaled" and "unsignaled", we need to use a separate fence
841 /// for each queue submission we might want to wait for, and remember which
842 /// [`FenceValue`] each one represents.
843 ///
844 /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
845 /// [`FenceValue`]: crate::FenceValue
846 FencePool {
847 last_completed: crate::FenceValue,
848 /// The pending fence values have to be ascending.
849 active: Vec<(crate::FenceValue, vk::Fence)>,
850 free: Vec<vk::Fence>,
851 },
852}
853
854impl Fence {
855 /// Return the highest [`FenceValue`] among the signalled fences in `active`.
856 ///
857 /// As an optimization, assume that we already know that the fence has
858 /// reached `last_completed`, and don't bother checking fences whose values
859 /// are less than that: those fences remain in the `active` array only
860 /// because we haven't called `maintain` yet to clean them up.
861 ///
862 /// [`FenceValue`]: crate::FenceValue
863 fn check_active(
864 device: &ash::Device,
865 mut last_completed: crate::FenceValue,
866 active: &[(crate::FenceValue, vk::Fence)],
867 ) -> Result<crate::FenceValue, crate::DeviceError> {
868 for &(value, raw) in active.iter() {
869 unsafe {
870 if value > last_completed && device.get_fence_status(raw)? {
871 last_completed = value;
872 }
873 }
874 }
875 Ok(last_completed)
876 }
877
878 /// Return the highest signalled [`FenceValue`] for `self`.
879 ///
880 /// [`FenceValue`]: crate::FenceValue
881 fn get_latest(
882 &self,
883 device: &ash::Device,
884 extension: Option<&ExtensionFn<khr::TimelineSemaphore>>,
885 ) -> Result<crate::FenceValue, crate::DeviceError> {
886 match *self {
887 Self::TimelineSemaphore(raw) => unsafe {
888 Ok(match *extension.unwrap() {
889 ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?,
890 ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?,
891 })
892 },
893 Self::FencePool {
894 last_completed,
895 ref active,
896 free: _,
897 } => Self::check_active(device, last_completed, active),
898 }
899 }
900
901 /// Trim the internal state of this [`Fence`].
902 ///
903 /// This function has no externally visible effect, but you should call it
904 /// periodically to keep this fence's resource consumption under control.
905 ///
906 /// For fences using the [`FencePool`] implementation, this function
907 /// recycles fences that have been signaled. If you don't call this,
908 /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
909 /// time it's called.
910 ///
911 /// [`FencePool`]: Fence::FencePool
912 /// [`Queue::submit`]: crate::Queue::submit
913 fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
914 match *self {
915 Self::TimelineSemaphore(_) => {}
916 Self::FencePool {
917 ref mut last_completed,
918 ref mut active,
919 ref mut free,
920 } => {
921 let latest = Self::check_active(device, *last_completed, active)?;
922 let base_free = free.len();
923 for &(value, raw) in active.iter() {
924 if value <= latest {
925 free.push(raw);
926 }
927 }
928 if free.len() != base_free {
929 active.retain(|&(value, _)| value > latest);
930 unsafe {
931 device.reset_fences(&free[base_free..])?;
932 }
933 }
934 *last_completed = latest;
935 }
936 }
937 Ok(())
938 }
939}
940
941impl crate::Queue for Queue {
942 type A = Api;
943
944 unsafe fn submit(
945 &self,
946 command_buffers: &[&CommandBuffer],
947 surface_textures: &[&SurfaceTexture],
948 (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
949 ) -> Result<(), crate::DeviceError> {
950 let mut fence_raw = vk::Fence::null();
951
952 let mut wait_stage_masks = Vec::new();
953 let mut wait_semaphores = Vec::new();
954 let mut signal_semaphores = Vec::new();
955 let mut signal_values = Vec::new();
956
957 // Double check that the same swapchain image isn't being given to us multiple times,
958 // as that will deadlock when we try to lock them all.
959 debug_assert!(
960 {
961 let mut check = HashSet::with_capacity(surface_textures.len());
962 // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
963 for st in surface_textures {
964 check.insert(Arc::as_ptr(&st.surface_semaphores));
965 }
966 check.len() == surface_textures.len()
967 },
968 "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
969 );
970
971 let locked_swapchain_semaphores = surface_textures
972 .iter()
973 .map(|st| {
974 st.surface_semaphores
975 .try_lock()
976 .expect("Failed to lock surface semaphore.")
977 })
978 .collect::<Vec<_>>();
979
980 for mut swapchain_semaphore in locked_swapchain_semaphores {
981 swapchain_semaphore.set_used_fence_value(signal_value);
982
983 // If we're the first submission to operate on this image, wait on
984 // its acquire semaphore, to make sure the presentation engine is
985 // done with it.
986 if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
987 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
988 wait_semaphores.push(sem);
989 }
990
991 // Get a semaphore to signal when we're done writing to this surface
992 // image. Presentation of this image will wait for this.
993 let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
994 signal_semaphores.push(signal_semaphore);
995 signal_values.push(!0);
996 }
997
998 // In order for submissions to be strictly ordered, we encode a dependency between each submission
999 // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1000 let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1001
1002 if let Some(sem) = semaphore_state.wait {
1003 wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1004 wait_semaphores.push(sem);
1005 }
1006
1007 signal_semaphores.push(semaphore_state.signal);
1008 signal_values.push(!0);
1009
1010 // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1011 signal_fence.maintain(&self.device.raw)?;
1012 match *signal_fence {
1013 Fence::TimelineSemaphore(raw) => {
1014 signal_semaphores.push(raw);
1015 signal_values.push(signal_value);
1016 }
1017 Fence::FencePool {
1018 ref mut active,
1019 ref mut free,
1020 ..
1021 } => {
1022 fence_raw = match free.pop() {
1023 Some(raw) => raw,
1024 None => unsafe {
1025 self.device
1026 .raw
1027 .create_fence(&vk::FenceCreateInfo::default(), None)?
1028 },
1029 };
1030 active.push((signal_value, fence_raw));
1031 }
1032 }
1033
1034 let vk_cmd_buffers = command_buffers
1035 .iter()
1036 .map(|cmd| cmd.raw)
1037 .collect::<Vec<_>>();
1038
1039 let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers);
1040
1041 vk_info = vk_info
1042 .wait_semaphores(&wait_semaphores)
1043 .wait_dst_stage_mask(&wait_stage_masks)
1044 .signal_semaphores(&signal_semaphores);
1045
1046 let mut vk_timeline_info;
1047
1048 if self.device.private_caps.timeline_semaphores {
1049 vk_timeline_info =
1050 vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values);
1051 vk_info = vk_info.push_next(&mut vk_timeline_info);
1052 }
1053
1054 profiling::scope!("vkQueueSubmit");
1055 unsafe {
1056 self.device
1057 .raw
1058 .queue_submit(self.raw, &[vk_info.build()], fence_raw)?
1059 };
1060 Ok(())
1061 }
1062
1063 unsafe fn present(
1064 &self,
1065 surface: &Surface,
1066 texture: SurfaceTexture,
1067 ) -> Result<(), crate::SurfaceError> {
1068 let mut swapchain = surface.swapchain.write();
1069 let ssc = swapchain.as_mut().unwrap();
1070 let mut swapchain_semaphores = texture.surface_semaphores.lock();
1071
1072 let swapchains = [ssc.raw];
1073 let image_indices = [texture.index];
1074 let vk_info = vk::PresentInfoKHR::builder()
1075 .swapchains(&swapchains)
1076 .image_indices(&image_indices)
1077 .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1078
1079 let suboptimal = {
1080 profiling::scope!("vkQueuePresentKHR");
1081 unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1082 match error {
1083 vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1084 vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1085 _ => crate::DeviceError::from(error).into(),
1086 }
1087 })?
1088 };
1089 if suboptimal {
1090 // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1091 // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1092 // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1093 // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1094 #[cfg(not(target_os = "android"))]
1095 log::warn!("Suboptimal present of frame {}", texture.index);
1096 }
1097 Ok(())
1098 }
1099
1100 unsafe fn get_timestamp_period(&self) -> f32 {
1101 self.device.timestamp_period
1102 }
1103}
1104
1105impl From<vk::Result> for crate::DeviceError {
1106 fn from(result: vk::Result) -> Self {
1107 #![allow(unreachable_code)]
1108 match result {
1109 vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1110 #[cfg(feature = "oom_panic")]
1111 panic!("Out of memory ({result:?})");
1112
1113 Self::OutOfMemory
1114 }
1115 vk::Result::ERROR_DEVICE_LOST => {
1116 #[cfg(feature = "device_lost_panic")]
1117 panic!("Device lost");
1118
1119 Self::Lost
1120 }
1121 _ => {
1122 #[cfg(feature = "internal_error_panic")]
1123 panic!("Internal error: {result:?}");
1124
1125 log::warn!("Unrecognized device error {result:?}");
1126 Self::Lost
1127 }
1128 }
1129 }
1130}