wgpu_hal/vulkan/
mod.rs

1/*!
2# Vulkan API internals.
3
4## Stack memory
5
6Ash expects slices, which we don't generally have available.
7We cope with this requirement by the combination of the following ways:
8  - temporarily allocating `Vec` on heap, where overhead is permitted
9  - growing temporary local storage
10  - using `implace_it` on iterators
11
12## Framebuffers and Render passes
13
14Render passes are cached on the device and kept forever.
15
16Framebuffers are also cached on the device, but they are removed when
17any of the image views (they have) gets removed.
18If Vulkan supports image-less framebuffers,
19then the actual views are excluded from the framebuffer key.
20
21## Fences
22
23If timeline semaphores are available, they are used 1:1 with wgpu-hal fences.
24Otherwise, we manage a pool of `VkFence` objects behind each `hal::Fence`.
25
26!*/
27
28mod adapter;
29mod command;
30mod conv;
31mod device;
32mod instance;
33
34use std::{borrow::Borrow, collections::HashSet, ffi::CStr, fmt, mem, num::NonZeroU32, sync::Arc};
35
36use arrayvec::ArrayVec;
37use ash::{
38    extensions::{ext, khr},
39    vk,
40};
41use parking_lot::{Mutex, RwLock};
42
43const MILLIS_TO_NANOS: u64 = 1_000_000;
44const MAX_TOTAL_ATTACHMENTS: usize = crate::MAX_COLOR_ATTACHMENTS * 2 + 1;
45
46#[derive(Clone, Debug)]
47pub struct Api;
48
49impl crate::Api for Api {
50    type Instance = Instance;
51    type Surface = Surface;
52    type Adapter = Adapter;
53    type Device = Device;
54
55    type Queue = Queue;
56    type CommandEncoder = CommandEncoder;
57    type CommandBuffer = CommandBuffer;
58
59    type Buffer = Buffer;
60    type Texture = Texture;
61    type SurfaceTexture = SurfaceTexture;
62    type TextureView = TextureView;
63    type Sampler = Sampler;
64    type QuerySet = QuerySet;
65    type Fence = Fence;
66    type AccelerationStructure = AccelerationStructure;
67
68    type BindGroupLayout = BindGroupLayout;
69    type BindGroup = BindGroup;
70    type PipelineLayout = PipelineLayout;
71    type ShaderModule = ShaderModule;
72    type RenderPipeline = RenderPipeline;
73    type ComputePipeline = ComputePipeline;
74}
75
76struct DebugUtils {
77    extension: ext::DebugUtils,
78    messenger: vk::DebugUtilsMessengerEXT,
79
80    /// Owning pointer to the debug messenger callback user data.
81    ///
82    /// `InstanceShared::drop` destroys the debug messenger before
83    /// dropping this, so the callback should never receive a dangling
84    /// user data pointer.
85    #[allow(dead_code)]
86    callback_data: Box<DebugUtilsMessengerUserData>,
87}
88
89pub struct DebugUtilsCreateInfo {
90    severity: vk::DebugUtilsMessageSeverityFlagsEXT,
91    message_type: vk::DebugUtilsMessageTypeFlagsEXT,
92    callback_data: Box<DebugUtilsMessengerUserData>,
93}
94
95#[derive(Debug)]
96/// The properties related to the validation layer needed for the
97/// DebugUtilsMessenger for their workarounds
98struct ValidationLayerProperties {
99    /// Validation layer description, from `vk::LayerProperties`.
100    layer_description: std::ffi::CString,
101
102    /// Validation layer specification version, from `vk::LayerProperties`.
103    layer_spec_version: u32,
104}
105
106/// User data needed by `instance::debug_utils_messenger_callback`.
107///
108/// When we create the [`vk::DebugUtilsMessengerEXT`], the `pUserData`
109/// pointer refers to one of these values.
110#[derive(Debug)]
111pub struct DebugUtilsMessengerUserData {
112    /// The properties related to the validation layer, if present
113    validation_layer_properties: Option<ValidationLayerProperties>,
114
115    /// If the OBS layer is present. OBS never increments the version of their layer,
116    /// so there's no reason to have the version.
117    has_obs_layer: bool,
118}
119
120pub struct InstanceShared {
121    raw: ash::Instance,
122    extensions: Vec<&'static CStr>,
123    drop_guard: Option<crate::DropGuard>,
124    flags: wgt::InstanceFlags,
125    debug_utils: Option<DebugUtils>,
126    get_physical_device_properties: Option<khr::GetPhysicalDeviceProperties2>,
127    entry: ash::Entry,
128    has_nv_optimus: bool,
129    android_sdk_version: u32,
130    /// The instance API version.
131    ///
132    /// Which is the version of Vulkan supported for instance-level functionality.
133    ///
134    /// It is associated with a `VkInstance` and its children,
135    /// except for a `VkPhysicalDevice` and its children.
136    instance_api_version: u32,
137}
138
139pub struct Instance {
140    shared: Arc<InstanceShared>,
141}
142
143/// The semaphores needed to use one image in a swapchain.
144#[derive(Debug)]
145struct SwapchainImageSemaphores {
146    /// A semaphore that is signaled when this image is safe for us to modify.
147    ///
148    /// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
149    /// image that we should use, that image may actually still be in use by the
150    /// presentation engine, and is not yet safe to modify. However, that
151    /// function does accept a semaphore that it will signal when the image is
152    /// indeed safe to begin messing with.
153    ///
154    /// This semaphore is:
155    ///
156    /// - waited for by the first queue submission to operate on this image
157    ///   since it was acquired, and
158    ///
159    /// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
160    ///   for us to use.
161    ///
162    /// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
163    acquire: vk::Semaphore,
164
165    /// True if the next command submission operating on this image should wait
166    /// for [`acquire`].
167    ///
168    /// We must wait for `acquire` before drawing to this swapchain image, but
169    /// because `wgpu-hal` queue submissions are always strongly ordered, only
170    /// the first submission that works with a swapchain image actually needs to
171    /// wait. We set this flag when this image is acquired, and clear it the
172    /// first time it's passed to [`Queue::submit`] as a surface texture.
173    ///
174    /// [`acquire`]: SwapchainImageSemaphores::acquire
175    /// [`Queue::submit`]: crate::Queue::submit
176    should_wait_for_acquire: bool,
177
178    /// A pool of semaphores for ordering presentation after drawing.
179    ///
180    /// The first [`present_index`] semaphores in this vector are:
181    ///
182    /// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
183    ///   image, and
184    ///
185    /// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
186    ///   this image, when the submission finishes execution.
187    ///
188    /// This vector accumulates one semaphore per submission that writes to this
189    /// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
190    /// requires a semaphore to order it with respect to drawing commands, and
191    /// we can't attach new completion semaphores to a command submission after
192    /// it's been submitted. This means that, at submission time, we must create
193    /// the semaphore we might need if the caller's next action is to enqueue a
194    /// presentation of this image.
195    ///
196    /// An alternative strategy would be for presentation to enqueue an empty
197    /// submit, ordered relative to other submits in the usual way, and
198    /// signaling a single presentation semaphore. But we suspect that submits
199    /// are usually expensive enough, and semaphores usually cheap enough, that
200    /// performance-sensitive users will avoid making many submits, so that the
201    /// cost of accumulated semaphores will usually be less than the cost of an
202    /// additional submit.
203    ///
204    /// Only the first [`present_index`] semaphores in the vector are actually
205    /// going to be signalled by submitted commands, and need to be waited for
206    /// by the next present call. Any semaphores beyond that index were created
207    /// for prior presents and are simply being retained for recycling.
208    ///
209    /// [`present_index`]: SwapchainImageSemaphores::present_index
210    /// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
211    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
212    present: Vec<vk::Semaphore>,
213
214    /// The number of semaphores in [`present`] to be signalled for this submission.
215    ///
216    /// [`present`]: SwapchainImageSemaphores::present
217    present_index: usize,
218
219    /// The fence value of the last command submission that wrote to this image.
220    ///
221    /// The next time we try to acquire this image, we'll block until
222    /// this submission finishes, proving that [`acquire`] is ready to
223    /// pass to `vkAcquireNextImageKHR` again.
224    ///
225    /// [`acquire`]: SwapchainImageSemaphores::acquire
226    previously_used_submission_index: crate::FenceValue,
227}
228
229impl SwapchainImageSemaphores {
230    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
231        Ok(Self {
232            acquire: device.new_binary_semaphore()?,
233            should_wait_for_acquire: true,
234            present: Vec::new(),
235            present_index: 0,
236            previously_used_submission_index: 0,
237        })
238    }
239
240    fn set_used_fence_value(&mut self, value: crate::FenceValue) {
241        self.previously_used_submission_index = value;
242    }
243
244    /// Return the semaphore that commands drawing to this image should wait for, if any.
245    ///
246    /// This only returns `Some` once per acquisition; see
247    /// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
248    fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
249        if self.should_wait_for_acquire {
250            self.should_wait_for_acquire = false;
251            Some(self.acquire)
252        } else {
253            None
254        }
255    }
256
257    /// Return a semaphore that a submission that writes to this image should
258    /// signal when it's done.
259    ///
260    /// See [`SwapchainImageSemaphores::present`] for details.
261    fn get_submit_signal_semaphore(
262        &mut self,
263        device: &DeviceShared,
264    ) -> Result<vk::Semaphore, crate::DeviceError> {
265        // Try to recycle a semaphore we created for a previous presentation.
266        let sem = match self.present.get(self.present_index) {
267            Some(sem) => *sem,
268            None => {
269                let sem = device.new_binary_semaphore()?;
270                self.present.push(sem);
271                sem
272            }
273        };
274
275        self.present_index += 1;
276
277        Ok(sem)
278    }
279
280    /// Return the semaphores that a presentation of this image should wait on.
281    ///
282    /// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
283    /// ends this image's acquisition should wait for. See
284    /// [`SwapchainImageSemaphores::present`] for details.
285    ///
286    /// Reset `self` to be ready for the next acquisition cycle.
287    ///
288    /// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
289    fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
290        let old_index = self.present_index;
291
292        // Since this marks the end of this acquire/draw/present cycle, take the
293        // opportunity to reset `self` in preparation for the next acquisition.
294        self.present_index = 0;
295        self.should_wait_for_acquire = true;
296
297        &self.present[0..old_index]
298    }
299
300    unsafe fn destroy(&self, device: &ash::Device) {
301        unsafe {
302            device.destroy_semaphore(self.acquire, None);
303            for sem in &self.present {
304                device.destroy_semaphore(*sem, None);
305            }
306        }
307    }
308}
309
310struct Swapchain {
311    raw: vk::SwapchainKHR,
312    raw_flags: vk::SwapchainCreateFlagsKHR,
313    functor: khr::Swapchain,
314    device: Arc<DeviceShared>,
315    images: Vec<vk::Image>,
316    config: crate::SurfaceConfiguration,
317    view_formats: Vec<wgt::TextureFormat>,
318    /// One wait semaphore per swapchain image. This will be associated with the
319    /// surface texture, and later collected during submission.
320    ///
321    /// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
322    /// data into the surface texture, so submit/present can use it.
323    surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
324    /// The index of the next semaphore to use. Ideally we would use the same
325    /// index as the image index, but we need to specify the semaphore as an argument
326    /// to the acquire_next_image function which is what tells us which image to use.
327    next_semaphore_index: usize,
328}
329
330impl Swapchain {
331    fn advance_surface_semaphores(&mut self) {
332        let semaphore_count = self.surface_semaphores.len();
333        self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
334    }
335
336    fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
337        self.surface_semaphores[self.next_semaphore_index].clone()
338    }
339}
340
341pub struct Surface {
342    raw: vk::SurfaceKHR,
343    functor: khr::Surface,
344    instance: Arc<InstanceShared>,
345    swapchain: RwLock<Option<Swapchain>>,
346}
347
348#[derive(Debug)]
349pub struct SurfaceTexture {
350    index: u32,
351    texture: Texture,
352    surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
353}
354
355impl Borrow<Texture> for SurfaceTexture {
356    fn borrow(&self) -> &Texture {
357        &self.texture
358    }
359}
360
361pub struct Adapter {
362    raw: vk::PhysicalDevice,
363    instance: Arc<InstanceShared>,
364    //queue_families: Vec<vk::QueueFamilyProperties>,
365    known_memory_flags: vk::MemoryPropertyFlags,
366    phd_capabilities: adapter::PhysicalDeviceProperties,
367    //phd_features: adapter::PhysicalDeviceFeatures,
368    downlevel_flags: wgt::DownlevelFlags,
369    private_caps: PrivateCapabilities,
370    workarounds: Workarounds,
371}
372
373// TODO there's no reason why this can't be unified--the function pointers should all be the same--it's not clear how to do this with `ash`.
374enum ExtensionFn<T> {
375    /// The loaded function pointer struct for an extension.
376    Extension(T),
377    /// The extension was promoted to a core version of Vulkan and the functions on `ash`'s `DeviceV1_x` traits should be used.
378    Promoted,
379}
380
381struct DeviceExtensionFunctions {
382    draw_indirect_count: Option<khr::DrawIndirectCount>,
383    timeline_semaphore: Option<ExtensionFn<khr::TimelineSemaphore>>,
384    ray_tracing: Option<RayTracingDeviceExtensionFunctions>,
385}
386
387struct RayTracingDeviceExtensionFunctions {
388    acceleration_structure: khr::AccelerationStructure,
389    buffer_device_address: khr::BufferDeviceAddress,
390}
391
392/// Set of internal capabilities, which don't show up in the exposed
393/// device geometry, but affect the code paths taken internally.
394#[derive(Clone, Debug)]
395struct PrivateCapabilities {
396    /// Y-flipping is implemented with either `VK_AMD_negative_viewport_height` or `VK_KHR_maintenance1`/1.1+. The AMD extension for negative viewport height does not require a Y shift.
397    ///
398    /// This flag is `true` if the device has `VK_KHR_maintenance1`/1.1+ and `false` otherwise (i.e. in the case of `VK_AMD_negative_viewport_height`).
399    flip_y_requires_shift: bool,
400    imageless_framebuffers: bool,
401    image_view_usage: bool,
402    timeline_semaphores: bool,
403    texture_d24: bool,
404    texture_d24_s8: bool,
405    texture_s8: bool,
406    /// Ability to present contents to any screen. Only needed to work around broken platform configurations.
407    can_present: bool,
408    non_coherent_map_mask: wgt::BufferAddress,
409    robust_buffer_access: bool,
410    robust_image_access: bool,
411    robust_buffer_access2: bool,
412    robust_image_access2: bool,
413    zero_initialize_workgroup_memory: bool,
414    image_format_list: bool,
415}
416
417bitflags::bitflags!(
418    /// Workaround flags.
419    #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
420    pub struct Workarounds: u32 {
421        /// Only generate SPIR-V for one entry point at a time.
422        const SEPARATE_ENTRY_POINTS = 0x1;
423        /// Qualcomm OOMs when there are zero color attachments but a non-null pointer
424        /// to a subpass resolve attachment array. This nulls out that pointer in that case.
425        const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
426        /// If the following code returns false, then nvidia will end up filling the wrong range.
427        ///
428        /// ```skip
429        /// fn nvidia_succeeds() -> bool {
430        ///   # let (copy_length, start_offset) = (0, 0);
431        ///     if copy_length >= 4096 {
432        ///         if start_offset % 16 != 0 {
433        ///             if copy_length == 4096 {
434        ///                 return true;
435        ///             }
436        ///             if copy_length % 16 == 0 {
437        ///                 return false;
438        ///             }
439        ///         }
440        ///     }
441        ///     true
442        /// }
443        /// ```
444        ///
445        /// As such, we need to make sure all calls to vkCmdFillBuffer are aligned to 16 bytes
446        /// if they cover a range of 4096 bytes or more.
447        const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
448    }
449);
450
451#[derive(Clone, Debug, Eq, Hash, PartialEq)]
452struct AttachmentKey {
453    format: vk::Format,
454    layout: vk::ImageLayout,
455    ops: crate::AttachmentOps,
456}
457
458impl AttachmentKey {
459    /// Returns an attachment key for a compatible attachment.
460    fn compatible(format: vk::Format, layout: vk::ImageLayout) -> Self {
461        Self {
462            format,
463            layout,
464            ops: crate::AttachmentOps::all(),
465        }
466    }
467}
468
469#[derive(Clone, Eq, Hash, PartialEq)]
470struct ColorAttachmentKey {
471    base: AttachmentKey,
472    resolve: Option<AttachmentKey>,
473}
474
475#[derive(Clone, Eq, Hash, PartialEq)]
476struct DepthStencilAttachmentKey {
477    base: AttachmentKey,
478    stencil_ops: crate::AttachmentOps,
479}
480
481#[derive(Clone, Eq, Default, Hash, PartialEq)]
482struct RenderPassKey {
483    colors: ArrayVec<Option<ColorAttachmentKey>, { crate::MAX_COLOR_ATTACHMENTS }>,
484    depth_stencil: Option<DepthStencilAttachmentKey>,
485    sample_count: u32,
486    multiview: Option<NonZeroU32>,
487}
488
489#[derive(Clone, Debug, Eq, Hash, PartialEq)]
490struct FramebufferAttachment {
491    /// Can be NULL if the framebuffer is image-less
492    raw: vk::ImageView,
493    raw_image_flags: vk::ImageCreateFlags,
494    view_usage: crate::TextureUses,
495    view_format: wgt::TextureFormat,
496    raw_view_formats: Vec<vk::Format>,
497}
498
499#[derive(Clone, Eq, Hash, PartialEq)]
500struct FramebufferKey {
501    attachments: ArrayVec<FramebufferAttachment, { MAX_TOTAL_ATTACHMENTS }>,
502    extent: wgt::Extent3d,
503    sample_count: u32,
504}
505
506struct DeviceShared {
507    raw: ash::Device,
508    family_index: u32,
509    queue_index: u32,
510    raw_queue: ash::vk::Queue,
511    handle_is_owned: bool,
512    instance: Arc<InstanceShared>,
513    physical_device: ash::vk::PhysicalDevice,
514    enabled_extensions: Vec<&'static CStr>,
515    extension_fns: DeviceExtensionFunctions,
516    vendor_id: u32,
517    timestamp_period: f32,
518    private_caps: PrivateCapabilities,
519    workarounds: Workarounds,
520    features: wgt::Features,
521    render_passes: Mutex<rustc_hash::FxHashMap<RenderPassKey, vk::RenderPass>>,
522    framebuffers: Mutex<rustc_hash::FxHashMap<FramebufferKey, vk::Framebuffer>>,
523}
524
525pub struct Device {
526    shared: Arc<DeviceShared>,
527    mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
528    desc_allocator:
529        Mutex<gpu_descriptor::DescriptorAllocator<vk::DescriptorPool, vk::DescriptorSet>>,
530    valid_ash_memory_types: u32,
531    naga_options: naga::back::spv::Options<'static>,
532    #[cfg(feature = "renderdoc")]
533    render_doc: crate::auxil::renderdoc::RenderDoc,
534}
535
536/// Semaphores for forcing queue submissions to run in order.
537///
538/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
539/// ordered, then the first submission will finish on the GPU before the second
540/// submission begins. To get this behavior on Vulkan we need to pass semaphores
541/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
542/// and to signal when their execution is done.
543///
544/// Normally this can be done with a single semaphore, waited on and then
545/// signalled for each submission. At any given time there's exactly one
546/// submission that would signal the semaphore, and exactly one waiting on it,
547/// as Vulkan requires.
548///
549/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
550/// hang if we use a single semaphore. The workaround is to alternate between
551/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
552/// the workaround until, say, Oct 2026.
553///
554/// [`wgpu_hal::Queue`]: crate::Queue
555/// [`submit`]: crate::Queue::submit
556/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
557/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
558#[derive(Clone)]
559struct RelaySemaphores {
560    /// The semaphore the next submission should wait on before beginning
561    /// execution on the GPU. This is `None` for the first submission, which
562    /// should not wait on anything at all.
563    wait: Option<vk::Semaphore>,
564
565    /// The semaphore the next submission should signal when it has finished
566    /// execution on the GPU.
567    signal: vk::Semaphore,
568}
569
570impl RelaySemaphores {
571    fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
572        Ok(Self {
573            wait: None,
574            signal: device.new_binary_semaphore()?,
575        })
576    }
577
578    /// Advances the semaphores, returning the semaphores that should be used for a submission.
579    fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
580        let old = self.clone();
581
582        // Build the state for the next submission.
583        match self.wait {
584            None => {
585                // The `old` values describe the first submission to this queue.
586                // The second submission should wait on `old.signal`, and then
587                // signal a new semaphore which we'll create now.
588                self.wait = Some(old.signal);
589                self.signal = device.new_binary_semaphore()?;
590            }
591            Some(ref mut wait) => {
592                // What this submission signals, the next should wait.
593                mem::swap(wait, &mut self.signal);
594            }
595        };
596
597        Ok(old)
598    }
599
600    /// Destroys the semaphores.
601    unsafe fn destroy(&self, device: &ash::Device) {
602        unsafe {
603            if let Some(wait) = self.wait {
604                device.destroy_semaphore(wait, None);
605            }
606            device.destroy_semaphore(self.signal, None);
607        }
608    }
609}
610
611pub struct Queue {
612    raw: vk::Queue,
613    swapchain_fn: khr::Swapchain,
614    device: Arc<DeviceShared>,
615    family_index: u32,
616    relay_semaphores: Mutex<RelaySemaphores>,
617}
618
619#[derive(Debug)]
620pub struct Buffer {
621    raw: vk::Buffer,
622    block: Option<Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>>,
623}
624
625#[derive(Debug)]
626pub struct AccelerationStructure {
627    raw: vk::AccelerationStructureKHR,
628    buffer: vk::Buffer,
629    block: Mutex<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
630}
631
632#[derive(Debug)]
633pub struct Texture {
634    raw: vk::Image,
635    drop_guard: Option<crate::DropGuard>,
636    block: Option<gpu_alloc::MemoryBlock<vk::DeviceMemory>>,
637    usage: crate::TextureUses,
638    format: wgt::TextureFormat,
639    raw_flags: vk::ImageCreateFlags,
640    copy_size: crate::CopyExtent,
641    view_formats: Vec<wgt::TextureFormat>,
642}
643
644impl Texture {
645    /// # Safety
646    ///
647    /// - The image handle must not be manually destroyed
648    pub unsafe fn raw_handle(&self) -> vk::Image {
649        self.raw
650    }
651}
652
653#[derive(Debug)]
654pub struct TextureView {
655    raw: vk::ImageView,
656    layers: NonZeroU32,
657    attachment: FramebufferAttachment,
658}
659
660impl TextureView {
661    /// # Safety
662    ///
663    /// - The image view handle must not be manually destroyed
664    pub unsafe fn raw_handle(&self) -> vk::ImageView {
665        self.raw
666    }
667}
668
669#[derive(Debug)]
670pub struct Sampler {
671    raw: vk::Sampler,
672}
673
674#[derive(Debug)]
675pub struct BindGroupLayout {
676    raw: vk::DescriptorSetLayout,
677    desc_count: gpu_descriptor::DescriptorTotalCount,
678    types: Box<[(vk::DescriptorType, u32)]>,
679    /// Map of binding index to size,
680    binding_arrays: Vec<(u32, NonZeroU32)>,
681}
682
683#[derive(Debug)]
684pub struct PipelineLayout {
685    raw: vk::PipelineLayout,
686    binding_arrays: naga::back::spv::BindingMap,
687}
688
689#[derive(Debug)]
690pub struct BindGroup {
691    set: gpu_descriptor::DescriptorSet<vk::DescriptorSet>,
692}
693
694/// Miscellaneous allocation recycling pool for `CommandAllocator`.
695#[derive(Default)]
696struct Temp {
697    marker: Vec<u8>,
698    buffer_barriers: Vec<vk::BufferMemoryBarrier>,
699    image_barriers: Vec<vk::ImageMemoryBarrier>,
700}
701
702unsafe impl Send for Temp {}
703unsafe impl Sync for Temp {}
704
705impl Temp {
706    fn clear(&mut self) {
707        self.marker.clear();
708        self.buffer_barriers.clear();
709        self.image_barriers.clear();
710        //see also - https://github.com/NotIntMan/inplace_it/issues/8
711    }
712
713    fn make_c_str(&mut self, name: &str) -> &CStr {
714        self.marker.clear();
715        self.marker.extend_from_slice(name.as_bytes());
716        self.marker.push(0);
717        unsafe { CStr::from_bytes_with_nul_unchecked(&self.marker) }
718    }
719}
720
721pub struct CommandEncoder {
722    raw: vk::CommandPool,
723    device: Arc<DeviceShared>,
724
725    /// The current command buffer, if `self` is in the ["recording"]
726    /// state.
727    ///
728    /// ["recording"]: crate::CommandEncoder
729    ///
730    /// If non-`null`, the buffer is in the Vulkan "recording" state.
731    active: vk::CommandBuffer,
732
733    /// What kind of pass we are currently within: compute or render.
734    bind_point: vk::PipelineBindPoint,
735
736    /// Allocation recycling pool for this encoder.
737    temp: Temp,
738
739    /// A pool of available command buffers.
740    ///
741    /// These are all in the Vulkan "initial" state.
742    free: Vec<vk::CommandBuffer>,
743
744    /// A pool of discarded command buffers.
745    ///
746    /// These could be in any Vulkan state except "pending".
747    discarded: Vec<vk::CommandBuffer>,
748
749    /// If this is true, the active renderpass enabled a debug span,
750    /// and needs to be disabled on renderpass close.
751    rpass_debug_marker_active: bool,
752
753    /// If set, the end of the next render/compute pass will write a timestamp at
754    /// the given pool & location.
755    end_of_pass_timer_query: Option<(vk::QueryPool, u32)>,
756}
757
758impl CommandEncoder {
759    /// # Safety
760    ///
761    /// - The command buffer handle must not be manually destroyed
762    pub unsafe fn raw_handle(&self) -> vk::CommandBuffer {
763        self.active
764    }
765}
766
767impl fmt::Debug for CommandEncoder {
768    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
769        f.debug_struct("CommandEncoder")
770            .field("raw", &self.raw)
771            .finish()
772    }
773}
774
775#[derive(Debug)]
776pub struct CommandBuffer {
777    raw: vk::CommandBuffer,
778}
779
780#[derive(Debug)]
781#[allow(clippy::large_enum_variant)]
782pub enum ShaderModule {
783    Raw(vk::ShaderModule),
784    Intermediate {
785        naga_shader: crate::NagaShader,
786        runtime_checks: bool,
787    },
788}
789
790#[derive(Debug)]
791pub struct RenderPipeline {
792    raw: vk::Pipeline,
793}
794
795#[derive(Debug)]
796pub struct ComputePipeline {
797    raw: vk::Pipeline,
798}
799
800#[derive(Debug)]
801pub struct QuerySet {
802    raw: vk::QueryPool,
803}
804
805/// The [`Api::Fence`] type for [`vulkan::Api`].
806///
807/// This is an `enum` because there are two possible implementations of
808/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
809/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
810/// require non-1.0 features.
811///
812/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
813/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
814/// otherwise.
815///
816/// [`Api::Fence`]: crate::Api::Fence
817/// [`vulkan::Api`]: Api
818/// [`Device::create_fence`]: crate::Device::create_fence
819/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
820/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
821/// [`FencePool`]: Fence::FencePool
822#[derive(Debug)]
823pub enum Fence {
824    /// A Vulkan [timeline semaphore].
825    ///
826    /// These are simpler to use than Vulkan fences, since timeline semaphores
827    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
828    ///
829    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
830    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
831    TimelineSemaphore(vk::Semaphore),
832
833    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
834    ///
835    /// The effective [`FenceValue`] of this variant is the greater of
836    /// `last_completed` and the maximum value associated with a signalled fence
837    /// in `active`.
838    ///
839    /// Fences are available in all versions of Vulkan, but since they only have
840    /// two states, "signaled" and "unsignaled", we need to use a separate fence
841    /// for each queue submission we might want to wait for, and remember which
842    /// [`FenceValue`] each one represents.
843    ///
844    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
845    /// [`FenceValue`]: crate::FenceValue
846    FencePool {
847        last_completed: crate::FenceValue,
848        /// The pending fence values have to be ascending.
849        active: Vec<(crate::FenceValue, vk::Fence)>,
850        free: Vec<vk::Fence>,
851    },
852}
853
854impl Fence {
855    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
856    ///
857    /// As an optimization, assume that we already know that the fence has
858    /// reached `last_completed`, and don't bother checking fences whose values
859    /// are less than that: those fences remain in the `active` array only
860    /// because we haven't called `maintain` yet to clean them up.
861    ///
862    /// [`FenceValue`]: crate::FenceValue
863    fn check_active(
864        device: &ash::Device,
865        mut last_completed: crate::FenceValue,
866        active: &[(crate::FenceValue, vk::Fence)],
867    ) -> Result<crate::FenceValue, crate::DeviceError> {
868        for &(value, raw) in active.iter() {
869            unsafe {
870                if value > last_completed && device.get_fence_status(raw)? {
871                    last_completed = value;
872                }
873            }
874        }
875        Ok(last_completed)
876    }
877
878    /// Return the highest signalled [`FenceValue`] for `self`.
879    ///
880    /// [`FenceValue`]: crate::FenceValue
881    fn get_latest(
882        &self,
883        device: &ash::Device,
884        extension: Option<&ExtensionFn<khr::TimelineSemaphore>>,
885    ) -> Result<crate::FenceValue, crate::DeviceError> {
886        match *self {
887            Self::TimelineSemaphore(raw) => unsafe {
888                Ok(match *extension.unwrap() {
889                    ExtensionFn::Extension(ref ext) => ext.get_semaphore_counter_value(raw)?,
890                    ExtensionFn::Promoted => device.get_semaphore_counter_value(raw)?,
891                })
892            },
893            Self::FencePool {
894                last_completed,
895                ref active,
896                free: _,
897            } => Self::check_active(device, last_completed, active),
898        }
899    }
900
901    /// Trim the internal state of this [`Fence`].
902    ///
903    /// This function has no externally visible effect, but you should call it
904    /// periodically to keep this fence's resource consumption under control.
905    ///
906    /// For fences using the [`FencePool`] implementation, this function
907    /// recycles fences that have been signaled. If you don't call this,
908    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
909    /// time it's called.
910    ///
911    /// [`FencePool`]: Fence::FencePool
912    /// [`Queue::submit`]: crate::Queue::submit
913    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
914        match *self {
915            Self::TimelineSemaphore(_) => {}
916            Self::FencePool {
917                ref mut last_completed,
918                ref mut active,
919                ref mut free,
920            } => {
921                let latest = Self::check_active(device, *last_completed, active)?;
922                let base_free = free.len();
923                for &(value, raw) in active.iter() {
924                    if value <= latest {
925                        free.push(raw);
926                    }
927                }
928                if free.len() != base_free {
929                    active.retain(|&(value, _)| value > latest);
930                    unsafe {
931                        device.reset_fences(&free[base_free..])?;
932                    }
933                }
934                *last_completed = latest;
935            }
936        }
937        Ok(())
938    }
939}
940
941impl crate::Queue for Queue {
942    type A = Api;
943
944    unsafe fn submit(
945        &self,
946        command_buffers: &[&CommandBuffer],
947        surface_textures: &[&SurfaceTexture],
948        (signal_fence, signal_value): (&mut Fence, crate::FenceValue),
949    ) -> Result<(), crate::DeviceError> {
950        let mut fence_raw = vk::Fence::null();
951
952        let mut wait_stage_masks = Vec::new();
953        let mut wait_semaphores = Vec::new();
954        let mut signal_semaphores = Vec::new();
955        let mut signal_values = Vec::new();
956
957        // Double check that the same swapchain image isn't being given to us multiple times,
958        // as that will deadlock when we try to lock them all.
959        debug_assert!(
960            {
961                let mut check = HashSet::with_capacity(surface_textures.len());
962                // We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
963                for st in surface_textures {
964                    check.insert(Arc::as_ptr(&st.surface_semaphores));
965                }
966                check.len() == surface_textures.len()
967            },
968            "More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
969        );
970
971        let locked_swapchain_semaphores = surface_textures
972            .iter()
973            .map(|st| {
974                st.surface_semaphores
975                    .try_lock()
976                    .expect("Failed to lock surface semaphore.")
977            })
978            .collect::<Vec<_>>();
979
980        for mut swapchain_semaphore in locked_swapchain_semaphores {
981            swapchain_semaphore.set_used_fence_value(signal_value);
982
983            // If we're the first submission to operate on this image, wait on
984            // its acquire semaphore, to make sure the presentation engine is
985            // done with it.
986            if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
987                wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
988                wait_semaphores.push(sem);
989            }
990
991            // Get a semaphore to signal when we're done writing to this surface
992            // image. Presentation of this image will wait for this.
993            let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
994            signal_semaphores.push(signal_semaphore);
995            signal_values.push(!0);
996        }
997
998        // In order for submissions to be strictly ordered, we encode a dependency between each submission
999        // using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
1000        let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
1001
1002        if let Some(sem) = semaphore_state.wait {
1003            wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
1004            wait_semaphores.push(sem);
1005        }
1006
1007        signal_semaphores.push(semaphore_state.signal);
1008        signal_values.push(!0);
1009
1010        // We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
1011        signal_fence.maintain(&self.device.raw)?;
1012        match *signal_fence {
1013            Fence::TimelineSemaphore(raw) => {
1014                signal_semaphores.push(raw);
1015                signal_values.push(signal_value);
1016            }
1017            Fence::FencePool {
1018                ref mut active,
1019                ref mut free,
1020                ..
1021            } => {
1022                fence_raw = match free.pop() {
1023                    Some(raw) => raw,
1024                    None => unsafe {
1025                        self.device
1026                            .raw
1027                            .create_fence(&vk::FenceCreateInfo::default(), None)?
1028                    },
1029                };
1030                active.push((signal_value, fence_raw));
1031            }
1032        }
1033
1034        let vk_cmd_buffers = command_buffers
1035            .iter()
1036            .map(|cmd| cmd.raw)
1037            .collect::<Vec<_>>();
1038
1039        let mut vk_info = vk::SubmitInfo::builder().command_buffers(&vk_cmd_buffers);
1040
1041        vk_info = vk_info
1042            .wait_semaphores(&wait_semaphores)
1043            .wait_dst_stage_mask(&wait_stage_masks)
1044            .signal_semaphores(&signal_semaphores);
1045
1046        let mut vk_timeline_info;
1047
1048        if self.device.private_caps.timeline_semaphores {
1049            vk_timeline_info =
1050                vk::TimelineSemaphoreSubmitInfo::builder().signal_semaphore_values(&signal_values);
1051            vk_info = vk_info.push_next(&mut vk_timeline_info);
1052        }
1053
1054        profiling::scope!("vkQueueSubmit");
1055        unsafe {
1056            self.device
1057                .raw
1058                .queue_submit(self.raw, &[vk_info.build()], fence_raw)?
1059        };
1060        Ok(())
1061    }
1062
1063    unsafe fn present(
1064        &self,
1065        surface: &Surface,
1066        texture: SurfaceTexture,
1067    ) -> Result<(), crate::SurfaceError> {
1068        let mut swapchain = surface.swapchain.write();
1069        let ssc = swapchain.as_mut().unwrap();
1070        let mut swapchain_semaphores = texture.surface_semaphores.lock();
1071
1072        let swapchains = [ssc.raw];
1073        let image_indices = [texture.index];
1074        let vk_info = vk::PresentInfoKHR::builder()
1075            .swapchains(&swapchains)
1076            .image_indices(&image_indices)
1077            .wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
1078
1079        let suboptimal = {
1080            profiling::scope!("vkQueuePresentKHR");
1081            unsafe { self.swapchain_fn.queue_present(self.raw, &vk_info) }.map_err(|error| {
1082                match error {
1083                    vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
1084                    vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
1085                    _ => crate::DeviceError::from(error).into(),
1086                }
1087            })?
1088        };
1089        if suboptimal {
1090            // We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
1091            // On Android 10+, libvulkan's `vkQueuePresentKHR` implementation returns `VK_SUBOPTIMAL_KHR` if not doing pre-rotation
1092            // (i.e `VkSwapchainCreateInfoKHR::preTransform` not being equal to the current device orientation).
1093            // This is always the case when the device orientation is anything other than the identity one, as we unconditionally use `VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR`.
1094            #[cfg(not(target_os = "android"))]
1095            log::warn!("Suboptimal present of frame {}", texture.index);
1096        }
1097        Ok(())
1098    }
1099
1100    unsafe fn get_timestamp_period(&self) -> f32 {
1101        self.device.timestamp_period
1102    }
1103}
1104
1105impl From<vk::Result> for crate::DeviceError {
1106    fn from(result: vk::Result) -> Self {
1107        #![allow(unreachable_code)]
1108        match result {
1109            vk::Result::ERROR_OUT_OF_HOST_MEMORY | vk::Result::ERROR_OUT_OF_DEVICE_MEMORY => {
1110                #[cfg(feature = "oom_panic")]
1111                panic!("Out of memory ({result:?})");
1112
1113                Self::OutOfMemory
1114            }
1115            vk::Result::ERROR_DEVICE_LOST => {
1116                #[cfg(feature = "device_lost_panic")]
1117                panic!("Device lost");
1118
1119                Self::Lost
1120            }
1121            _ => {
1122                #[cfg(feature = "internal_error_panic")]
1123                panic!("Internal error: {result:?}");
1124
1125                log::warn!("Unrecognized device error {result:?}");
1126                Self::Lost
1127            }
1128        }
1129    }
1130}
wgpu_hal/vulkan/mod.rs

wgpu_hal/vulkan/
mod.rs