Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

1029
vendor/wgpu-core/src/binding_model.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,50 @@
use crate::lock::{rank, Mutex};
/// A pool of free [`wgpu_hal::CommandEncoder`]s, owned by a `Device`.
///
/// Each encoder in this list is in the "closed" state.
///
/// Since a raw [`CommandEncoder`][ce] is itself a pool for allocating
/// raw [`CommandBuffer`][cb]s, this is a pool of pools.
///
/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
/// [ce]: hal::CommandEncoder
/// [cb]: hal::Api::CommandBuffer
pub(crate) struct CommandAllocator {
free_encoders: Mutex<Vec<Box<dyn hal::DynCommandEncoder>>>,
}
impl CommandAllocator {
pub(crate) fn new() -> Self {
Self {
free_encoders: Mutex::new(rank::COMMAND_ALLOCATOR_FREE_ENCODERS, Vec::new()),
}
}
/// Return a fresh [`wgpu_hal::CommandEncoder`] in the "closed" state.
///
/// If we have free encoders in the pool, take one of those. Otherwise,
/// create a new one on `device`.
///
/// [`wgpu_hal::CommandEncoder`]: hal::CommandEncoder
pub(crate) fn acquire_encoder(
&self,
device: &dyn hal::DynDevice,
queue: &dyn hal::DynQueue,
) -> Result<Box<dyn hal::DynCommandEncoder>, hal::DeviceError> {
let mut free_encoders = self.free_encoders.lock();
match free_encoders.pop() {
Some(encoder) => Ok(encoder),
None => unsafe {
let hal_desc = hal::CommandEncoderDescriptor { label: None, queue };
device.create_command_encoder(&hal_desc)
},
}
}
/// Add `encoder` back to the free pool.
pub(crate) fn release_encoder(&self, encoder: Box<dyn hal::DynCommandEncoder>) {
let mut free_encoders = self.free_encoders.lock();
free_encoders.push(encoder);
}
}

521
vendor/wgpu-core/src/command/bind.rs vendored Normal file
View File

@@ -0,0 +1,521 @@
use std::sync::Arc;
use crate::{
binding_model::{BindGroup, LateMinBufferBindingSizeMismatch, PipelineLayout},
device::SHADER_STAGE_COUNT,
pipeline::LateSizedBufferGroup,
resource::{Labeled, ResourceErrorIdent},
};
use arrayvec::ArrayVec;
use thiserror::Error;
mod compat {
use arrayvec::ArrayVec;
use thiserror::Error;
use wgt::{BindingType, ShaderStages};
use crate::{
binding_model::BindGroupLayout,
error::MultiError,
resource::{Labeled, ParentDevice, ResourceErrorIdent},
};
use std::{
num::NonZeroU32,
ops::Range,
sync::{Arc, Weak},
};
pub(crate) enum Error {
Incompatible {
expected_bgl: ResourceErrorIdent,
assigned_bgl: ResourceErrorIdent,
inner: MultiError,
},
Missing,
}
#[derive(Debug, Clone)]
struct Entry {
assigned: Option<Arc<BindGroupLayout>>,
expected: Option<Arc<BindGroupLayout>>,
}
impl Entry {
fn empty() -> Self {
Self {
assigned: None,
expected: None,
}
}
fn is_active(&self) -> bool {
self.assigned.is_some() && self.expected.is_some()
}
fn is_valid(&self) -> bool {
if let Some(expected_bgl) = self.expected.as_ref() {
if let Some(assigned_bgl) = self.assigned.as_ref() {
expected_bgl.is_equal(assigned_bgl)
} else {
false
}
} else {
true
}
}
fn is_incompatible(&self) -> bool {
self.expected.is_none() || !self.is_valid()
}
fn check(&self) -> Result<(), Error> {
if let Some(expected_bgl) = self.expected.as_ref() {
if let Some(assigned_bgl) = self.assigned.as_ref() {
if expected_bgl.is_equal(assigned_bgl) {
Ok(())
} else {
#[derive(Clone, Debug, Error)]
#[error(
"Exclusive pipelines don't match: expected {expected}, got {assigned}"
)]
struct IncompatibleExclusivePipelines {
expected: String,
assigned: String,
}
use crate::binding_model::ExclusivePipeline;
match (
expected_bgl.exclusive_pipeline.get().unwrap(),
assigned_bgl.exclusive_pipeline.get().unwrap(),
) {
(ExclusivePipeline::None, ExclusivePipeline::None) => {}
(
ExclusivePipeline::Render(e_pipeline),
ExclusivePipeline::Render(a_pipeline),
) if Weak::ptr_eq(e_pipeline, a_pipeline) => {}
(
ExclusivePipeline::Compute(e_pipeline),
ExclusivePipeline::Compute(a_pipeline),
) if Weak::ptr_eq(e_pipeline, a_pipeline) => {}
(expected, assigned) => {
return Err(Error::Incompatible {
expected_bgl: expected_bgl.error_ident(),
assigned_bgl: assigned_bgl.error_ident(),
inner: MultiError::new(core::iter::once(
IncompatibleExclusivePipelines {
expected: expected.to_string(),
assigned: assigned.to_string(),
},
))
.unwrap(),
});
}
}
#[derive(Clone, Debug, Error)]
enum EntryError {
#[error("Entries with binding {binding} differ in visibility: expected {expected:?}, got {assigned:?}")]
Visibility {
binding: u32,
expected: ShaderStages,
assigned: ShaderStages,
},
#[error("Entries with binding {binding} differ in type: expected {expected:?}, got {assigned:?}")]
Type {
binding: u32,
expected: BindingType,
assigned: BindingType,
},
#[error("Entries with binding {binding} differ in count: expected {expected:?}, got {assigned:?}")]
Count {
binding: u32,
expected: Option<NonZeroU32>,
assigned: Option<NonZeroU32>,
},
#[error("Expected entry with binding {binding} not found in assigned bind group layout")]
ExtraExpected { binding: u32 },
#[error("Assigned entry with binding {binding} not found in expected bind group layout")]
ExtraAssigned { binding: u32 },
}
let mut errors = Vec::new();
for (&binding, expected_entry) in expected_bgl.entries.iter() {
if let Some(assigned_entry) = assigned_bgl.entries.get(binding) {
if assigned_entry.visibility != expected_entry.visibility {
errors.push(EntryError::Visibility {
binding,
expected: expected_entry.visibility,
assigned: assigned_entry.visibility,
});
}
if assigned_entry.ty != expected_entry.ty {
errors.push(EntryError::Type {
binding,
expected: expected_entry.ty,
assigned: assigned_entry.ty,
});
}
if assigned_entry.count != expected_entry.count {
errors.push(EntryError::Count {
binding,
expected: expected_entry.count,
assigned: assigned_entry.count,
});
}
} else {
errors.push(EntryError::ExtraExpected { binding });
}
}
for (&binding, _) in assigned_bgl.entries.iter() {
if !expected_bgl.entries.contains_key(binding) {
errors.push(EntryError::ExtraAssigned { binding });
}
}
Err(Error::Incompatible {
expected_bgl: expected_bgl.error_ident(),
assigned_bgl: assigned_bgl.error_ident(),
inner: MultiError::new(errors.drain(..)).unwrap(),
})
}
} else {
Err(Error::Missing)
}
} else {
Ok(())
}
}
}
#[derive(Debug, Default)]
pub(crate) struct BoundBindGroupLayouts {
entries: ArrayVec<Entry, { hal::MAX_BIND_GROUPS }>,
}
impl BoundBindGroupLayouts {
pub fn new() -> Self {
Self {
entries: (0..hal::MAX_BIND_GROUPS).map(|_| Entry::empty()).collect(),
}
}
pub fn num_valid_entries(&self) -> usize {
// find first incompatible entry
self.entries
.iter()
.position(|e| e.is_incompatible())
.unwrap_or(self.entries.len())
}
fn make_range(&self, start_index: usize) -> Range<usize> {
let end = self.num_valid_entries();
start_index..end.max(start_index)
}
pub fn update_expectations(
&mut self,
expectations: &[Arc<BindGroupLayout>],
) -> Range<usize> {
let start_index = self
.entries
.iter()
.zip(expectations)
.position(|(e, expect)| {
e.expected.is_none() || !e.expected.as_ref().unwrap().is_equal(expect)
})
.unwrap_or(expectations.len());
for (e, expect) in self.entries[start_index..]
.iter_mut()
.zip(expectations[start_index..].iter())
{
e.expected = Some(expect.clone());
}
for e in self.entries[expectations.len()..].iter_mut() {
e.expected = None;
}
self.make_range(start_index)
}
pub fn assign(&mut self, index: usize, value: Arc<BindGroupLayout>) -> Range<usize> {
self.entries[index].assigned = Some(value);
self.make_range(index)
}
pub fn list_active(&self) -> impl Iterator<Item = usize> + '_ {
self.entries
.iter()
.enumerate()
.filter_map(|(i, e)| if e.is_active() { Some(i) } else { None })
}
#[allow(clippy::result_large_err)]
pub fn get_invalid(&self) -> Result<(), (usize, Error)> {
for (index, entry) in self.entries.iter().enumerate() {
entry.check().map_err(|e| (index, e))?;
}
Ok(())
}
}
}
#[derive(Clone, Debug, Error)]
pub enum BinderError {
#[error("The current set {pipeline} expects a BindGroup to be set at index {index}")]
MissingBindGroup {
index: usize,
pipeline: ResourceErrorIdent,
},
#[error("The {assigned_bgl} of current set {assigned_bg} at index {index} is not compatible with the corresponding {expected_bgl} of {pipeline}")]
IncompatibleBindGroup {
expected_bgl: ResourceErrorIdent,
assigned_bgl: ResourceErrorIdent,
assigned_bg: ResourceErrorIdent,
index: usize,
pipeline: ResourceErrorIdent,
#[source]
inner: crate::error::MultiError,
},
}
#[derive(Debug)]
struct LateBufferBinding {
shader_expect_size: wgt::BufferAddress,
bound_size: wgt::BufferAddress,
}
#[derive(Debug, Default)]
pub(super) struct EntryPayload {
pub(super) group: Option<Arc<BindGroup>>,
pub(super) dynamic_offsets: Vec<wgt::DynamicOffset>,
late_buffer_bindings: Vec<LateBufferBinding>,
/// Since `LateBufferBinding` may contain information about the bindings
/// not used by the pipeline, we need to know when to stop validating.
pub(super) late_bindings_effective_count: usize,
}
impl EntryPayload {
fn reset(&mut self) {
self.group = None;
self.dynamic_offsets.clear();
self.late_buffer_bindings.clear();
self.late_bindings_effective_count = 0;
}
}
#[derive(Debug, Default)]
pub(super) struct Binder {
pub(super) pipeline_layout: Option<Arc<PipelineLayout>>,
manager: compat::BoundBindGroupLayouts,
payloads: [EntryPayload; hal::MAX_BIND_GROUPS],
}
impl Binder {
pub(super) fn new() -> Self {
Self {
pipeline_layout: None,
manager: compat::BoundBindGroupLayouts::new(),
payloads: Default::default(),
}
}
pub(super) fn reset(&mut self) {
self.pipeline_layout = None;
self.manager = compat::BoundBindGroupLayouts::new();
for payload in self.payloads.iter_mut() {
payload.reset();
}
}
pub(super) fn change_pipeline_layout<'a>(
&'a mut self,
new: &Arc<PipelineLayout>,
late_sized_buffer_groups: &[LateSizedBufferGroup],
) -> (usize, &'a [EntryPayload]) {
let old_id_opt = self.pipeline_layout.replace(new.clone());
let mut bind_range = self.manager.update_expectations(&new.bind_group_layouts);
// Update the buffer binding sizes that are required by shaders.
for (payload, late_group) in self.payloads.iter_mut().zip(late_sized_buffer_groups) {
payload.late_bindings_effective_count = late_group.shader_sizes.len();
for (late_binding, &shader_expect_size) in payload
.late_buffer_bindings
.iter_mut()
.zip(late_group.shader_sizes.iter())
{
late_binding.shader_expect_size = shader_expect_size;
}
if late_group.shader_sizes.len() > payload.late_buffer_bindings.len() {
for &shader_expect_size in
late_group.shader_sizes[payload.late_buffer_bindings.len()..].iter()
{
payload.late_buffer_bindings.push(LateBufferBinding {
shader_expect_size,
bound_size: 0,
});
}
}
}
if let Some(old) = old_id_opt {
// root constants are the base compatibility property
if old.push_constant_ranges != new.push_constant_ranges {
bind_range.start = 0;
}
}
(bind_range.start, &self.payloads[bind_range])
}
pub(super) fn assign_group<'a>(
&'a mut self,
index: usize,
bind_group: &Arc<BindGroup>,
offsets: &[wgt::DynamicOffset],
) -> &'a [EntryPayload] {
let payload = &mut self.payloads[index];
payload.group = Some(bind_group.clone());
payload.dynamic_offsets.clear();
payload.dynamic_offsets.extend_from_slice(offsets);
// Fill out the actual binding sizes for buffers,
// whose layout doesn't specify `min_binding_size`.
for (late_binding, late_size) in payload
.late_buffer_bindings
.iter_mut()
.zip(bind_group.late_buffer_binding_sizes.iter())
{
late_binding.bound_size = late_size.get();
}
if bind_group.late_buffer_binding_sizes.len() > payload.late_buffer_bindings.len() {
for late_size in
bind_group.late_buffer_binding_sizes[payload.late_buffer_bindings.len()..].iter()
{
payload.late_buffer_bindings.push(LateBufferBinding {
shader_expect_size: 0,
bound_size: late_size.get(),
});
}
}
let bind_range = self.manager.assign(index, bind_group.layout.clone());
&self.payloads[bind_range]
}
pub(super) fn list_active<'a>(&'a self) -> impl Iterator<Item = &'a Arc<BindGroup>> + 'a {
let payloads = &self.payloads;
self.manager
.list_active()
.map(move |index| payloads[index].group.as_ref().unwrap())
}
#[cfg(feature = "indirect-validation")]
pub(super) fn list_valid<'a>(&'a self) -> impl Iterator<Item = (usize, &'a EntryPayload)> + 'a {
self.payloads
.iter()
.take(self.manager.num_valid_entries())
.enumerate()
}
pub(super) fn check_compatibility<T: Labeled>(
&self,
pipeline: &T,
) -> Result<(), Box<BinderError>> {
self.manager.get_invalid().map_err(|(index, error)| {
Box::new(match error {
compat::Error::Incompatible {
expected_bgl,
assigned_bgl,
inner,
} => BinderError::IncompatibleBindGroup {
expected_bgl,
assigned_bgl,
assigned_bg: self.payloads[index].group.as_ref().unwrap().error_ident(),
index,
pipeline: pipeline.error_ident(),
inner,
},
compat::Error::Missing => BinderError::MissingBindGroup {
index,
pipeline: pipeline.error_ident(),
},
})
})
}
/// Scan active buffer bindings corresponding to layouts without `min_binding_size` specified.
pub(super) fn check_late_buffer_bindings(
&self,
) -> Result<(), LateMinBufferBindingSizeMismatch> {
for group_index in self.manager.list_active() {
let payload = &self.payloads[group_index];
for (compact_index, late_binding) in payload.late_buffer_bindings
[..payload.late_bindings_effective_count]
.iter()
.enumerate()
{
if late_binding.bound_size < late_binding.shader_expect_size {
return Err(LateMinBufferBindingSizeMismatch {
group_index: group_index as u32,
compact_index,
shader_size: late_binding.shader_expect_size,
bound_size: late_binding.bound_size,
});
}
}
}
Ok(())
}
}
struct PushConstantChange {
stages: wgt::ShaderStages,
offset: u32,
enable: bool,
}
/// Break up possibly overlapping push constant ranges into a set of
/// non-overlapping ranges which contain all the stage flags of the
/// original ranges. This allows us to zero out (or write any value)
/// to every possible value.
pub fn compute_nonoverlapping_ranges(
ranges: &[wgt::PushConstantRange],
) -> ArrayVec<wgt::PushConstantRange, { SHADER_STAGE_COUNT * 2 }> {
if ranges.is_empty() {
return ArrayVec::new();
}
debug_assert!(ranges.len() <= SHADER_STAGE_COUNT);
let mut breaks: ArrayVec<PushConstantChange, { SHADER_STAGE_COUNT * 2 }> = ArrayVec::new();
for range in ranges {
breaks.push(PushConstantChange {
stages: range.stages,
offset: range.range.start,
enable: true,
});
breaks.push(PushConstantChange {
stages: range.stages,
offset: range.range.end,
enable: false,
});
}
breaks.sort_unstable_by_key(|change| change.offset);
let mut output_ranges = ArrayVec::new();
let mut position = 0_u32;
let mut stages = wgt::ShaderStages::NONE;
for bk in breaks {
if bk.offset - position > 0 && !stages.is_empty() {
output_ranges.push(wgt::PushConstantRange {
stages,
range: position..bk.offset,
})
}
position = bk.offset;
stages.set(bk.stages, bk.enable);
}
output_ranges
}

1773
vendor/wgpu-core/src/command/bundle.rs vendored Normal file

File diff suppressed because it is too large Load Diff

499
vendor/wgpu-core/src/command/clear.rs vendored Normal file
View File

@@ -0,0 +1,499 @@
use std::{ops::Range, sync::Arc};
#[cfg(feature = "trace")]
use crate::device::trace::Command as TraceCommand;
use crate::{
api_log,
command::CommandEncoderError,
device::DeviceError,
get_lowest_common_denom,
global::Global,
id::{BufferId, CommandEncoderId, TextureId},
init_tracker::{MemoryInitKind, TextureInitRange},
resource::{
DestroyedResourceError, InvalidResourceError, Labeled, MissingBufferUsageError,
ParentDevice, ResourceErrorIdent, Texture, TextureClearMode,
},
snatch::SnatchGuard,
track::{TextureSelector, TextureTrackerSetSingle},
};
use thiserror::Error;
use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, TextureAspect};
/// Error encountered while attempting a clear.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ClearError {
#[error("To use clear_texture the CLEAR_TEXTURE feature needs to be enabled")]
MissingClearTextureFeature,
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error("{0} can not be cleared")]
NoValidTextureClearMode(ResourceErrorIdent),
#[error("Buffer clear size {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")]
UnalignedFillSize(BufferAddress),
#[error("Buffer offset {0:?} is not a multiple of `COPY_BUFFER_ALIGNMENT`")]
UnalignedBufferOffset(BufferAddress),
#[error("Clear starts at offset {start_offset} with size of {requested_size}, but these added together exceed `u64::MAX`")]
OffsetPlusSizeExceeds64BitBounds {
start_offset: BufferAddress,
requested_size: BufferAddress,
},
#[error("Clear of {start_offset}..{end_offset} would end up overrunning the bounds of the buffer of size {buffer_size}")]
BufferOverrun {
start_offset: BufferAddress,
end_offset: BufferAddress,
buffer_size: BufferAddress,
},
#[error(transparent)]
MissingBufferUsage(#[from] MissingBufferUsageError),
#[error("Texture lacks the aspects that were specified in the image subresource range. Texture with format {texture_format:?}, specified was {subresource_range_aspects:?}")]
MissingTextureAspect {
texture_format: wgt::TextureFormat,
subresource_range_aspects: TextureAspect,
},
#[error("Image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \
whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")]
InvalidTextureLevelRange {
texture_level_range: Range<u32>,
subresource_base_mip_level: u32,
subresource_mip_level_count: Option<u32>,
},
#[error("Image subresource layer range is outside of the texture's layer range. texture range is {texture_layer_range:?}, \
whereas subesource range specified start {subresource_base_array_layer} and count {subresource_array_layer_count:?}")]
InvalidTextureLayerRange {
texture_layer_range: Range<u32>,
subresource_base_array_layer: u32,
subresource_array_layer_count: Option<u32>,
},
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
CommandEncoderError(#[from] CommandEncoderError),
#[error(transparent)]
InvalidResource(#[from] InvalidResourceError),
}
impl Global {
pub fn command_encoder_clear_buffer(
&self,
command_encoder_id: CommandEncoderId,
dst: BufferId,
offset: BufferAddress,
size: Option<BufferAddress>,
) -> Result<(), ClearError> {
profiling::scope!("CommandEncoder::clear_buffer");
api_log!("CommandEncoder::clear_buffer {dst:?}");
let hub = &self.hub;
let cmd_buf = hub
.command_buffers
.get(command_encoder_id.into_command_buffer_id());
let mut cmd_buf_data = cmd_buf.data.lock();
let mut cmd_buf_data_guard = cmd_buf_data.record()?;
let cmd_buf_data = &mut *cmd_buf_data_guard;
#[cfg(feature = "trace")]
if let Some(ref mut list) = cmd_buf_data.commands {
list.push(TraceCommand::ClearBuffer { dst, offset, size });
}
let dst_buffer = hub.buffers.get(dst).get()?;
dst_buffer.same_device_as(cmd_buf.as_ref())?;
let dst_pending = cmd_buf_data
.trackers
.buffers
.set_single(&dst_buffer, hal::BufferUses::COPY_DST);
let snatch_guard = dst_buffer.device.snatchable_lock.read();
let dst_raw = dst_buffer.try_raw(&snatch_guard)?;
dst_buffer.check_usage(BufferUsages::COPY_DST)?;
// Check if offset & size are valid.
if offset % wgt::COPY_BUFFER_ALIGNMENT != 0 {
return Err(ClearError::UnalignedBufferOffset(offset));
}
let size = size.unwrap_or(dst_buffer.size.saturating_sub(offset));
if size % wgt::COPY_BUFFER_ALIGNMENT != 0 {
return Err(ClearError::UnalignedFillSize(size));
}
let end_offset =
offset
.checked_add(size)
.ok_or(ClearError::OffsetPlusSizeExceeds64BitBounds {
start_offset: offset,
requested_size: size,
})?;
if end_offset > dst_buffer.size {
return Err(ClearError::BufferOverrun {
start_offset: offset,
end_offset,
buffer_size: dst_buffer.size,
});
}
if offset == end_offset {
log::trace!("Ignoring fill_buffer of size 0");
cmd_buf_data_guard.mark_successful();
return Ok(());
}
// Mark dest as initialized.
cmd_buf_data.buffer_memory_init_actions.extend(
dst_buffer.initialization_status.read().create_action(
&dst_buffer,
offset..end_offset,
MemoryInitKind::ImplicitlyInitialized,
),
);
// actual hal barrier & operation
let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
let cmd_buf_raw = cmd_buf_data.encoder.open()?;
unsafe {
cmd_buf_raw.transition_buffers(dst_barrier.as_slice());
cmd_buf_raw.clear_buffer(dst_raw, offset..end_offset);
}
cmd_buf_data_guard.mark_successful();
Ok(())
}
pub fn command_encoder_clear_texture(
&self,
command_encoder_id: CommandEncoderId,
dst: TextureId,
subresource_range: &ImageSubresourceRange,
) -> Result<(), ClearError> {
profiling::scope!("CommandEncoder::clear_texture");
api_log!("CommandEncoder::clear_texture {dst:?}");
let hub = &self.hub;
let cmd_buf = hub
.command_buffers
.get(command_encoder_id.into_command_buffer_id());
let mut cmd_buf_data = cmd_buf.data.lock();
let mut cmd_buf_data_guard = cmd_buf_data.record()?;
let cmd_buf_data = &mut *cmd_buf_data_guard;
#[cfg(feature = "trace")]
if let Some(ref mut list) = cmd_buf_data.commands {
list.push(TraceCommand::ClearTexture {
dst,
subresource_range: *subresource_range,
});
}
if !cmd_buf.support_clear_texture {
return Err(ClearError::MissingClearTextureFeature);
}
let dst_texture = hub.textures.get(dst).get()?;
dst_texture.same_device_as(cmd_buf.as_ref())?;
// Check if subresource aspects are valid.
let clear_aspects =
hal::FormatAspects::new(dst_texture.desc.format, subresource_range.aspect);
if clear_aspects.is_empty() {
return Err(ClearError::MissingTextureAspect {
texture_format: dst_texture.desc.format,
subresource_range_aspects: subresource_range.aspect,
});
};
// Check if subresource level range is valid
let subresource_mip_range = subresource_range.mip_range(dst_texture.full_range.mips.end);
if dst_texture.full_range.mips.start > subresource_mip_range.start
|| dst_texture.full_range.mips.end < subresource_mip_range.end
{
return Err(ClearError::InvalidTextureLevelRange {
texture_level_range: dst_texture.full_range.mips.clone(),
subresource_base_mip_level: subresource_range.base_mip_level,
subresource_mip_level_count: subresource_range.mip_level_count,
});
}
// Check if subresource layer range is valid
let subresource_layer_range =
subresource_range.layer_range(dst_texture.full_range.layers.end);
if dst_texture.full_range.layers.start > subresource_layer_range.start
|| dst_texture.full_range.layers.end < subresource_layer_range.end
{
return Err(ClearError::InvalidTextureLayerRange {
texture_layer_range: dst_texture.full_range.layers.clone(),
subresource_base_array_layer: subresource_range.base_array_layer,
subresource_array_layer_count: subresource_range.array_layer_count,
});
}
let device = &cmd_buf.device;
device.check_is_valid()?;
let (encoder, tracker) = cmd_buf_data.open_encoder_and_tracker()?;
let snatch_guard = device.snatchable_lock.read();
clear_texture(
&dst_texture,
TextureInitRange {
mip_range: subresource_mip_range,
layer_range: subresource_layer_range,
},
encoder,
&mut tracker.textures,
&device.alignments,
device.zero_buffer.as_ref(),
&snatch_guard,
)?;
cmd_buf_data_guard.mark_successful();
Ok(())
}
}
pub(crate) fn clear_texture<T: TextureTrackerSetSingle>(
dst_texture: &Arc<Texture>,
range: TextureInitRange,
encoder: &mut dyn hal::DynCommandEncoder,
texture_tracker: &mut T,
alignments: &hal::Alignments,
zero_buffer: &dyn hal::DynBuffer,
snatch_guard: &SnatchGuard<'_>,
) -> Result<(), ClearError> {
let dst_raw = dst_texture.try_raw(snatch_guard)?;
// Issue the right barrier.
let clear_usage = match dst_texture.clear_mode {
TextureClearMode::BufferCopy => hal::TextureUses::COPY_DST,
TextureClearMode::RenderPass {
is_color: false, ..
} => hal::TextureUses::DEPTH_STENCIL_WRITE,
TextureClearMode::Surface { .. } | TextureClearMode::RenderPass { is_color: true, .. } => {
hal::TextureUses::COLOR_TARGET
}
TextureClearMode::None => {
return Err(ClearError::NoValidTextureClearMode(
dst_texture.error_ident(),
));
}
};
let selector = TextureSelector {
mips: range.mip_range.clone(),
layers: range.layer_range.clone(),
};
// If we're in a texture-init usecase, we know that the texture is already
// tracked since whatever caused the init requirement, will have caused the
// usage tracker to be aware of the texture. Meaning, that it is safe to
// call call change_replace_tracked if the life_guard is already gone (i.e.
// the user no longer holds on to this texture).
//
// On the other hand, when coming via command_encoder_clear_texture, the
// life_guard is still there since in order to call it a texture object is
// needed.
//
// We could in theory distinguish these two scenarios in the internal
// clear_texture api in order to remove this check and call the cheaper
// change_replace_tracked whenever possible.
let dst_barrier = texture_tracker
.set_single(dst_texture, selector, clear_usage)
.map(|pending| pending.into_hal(dst_raw))
.collect::<Vec<_>>();
unsafe {
encoder.transition_textures(&dst_barrier);
}
// Record actual clearing
match dst_texture.clear_mode {
TextureClearMode::BufferCopy => clear_texture_via_buffer_copies(
&dst_texture.desc,
alignments,
zero_buffer,
range,
encoder,
dst_raw,
),
TextureClearMode::Surface { .. } => {
clear_texture_via_render_passes(dst_texture, range, true, encoder)
}
TextureClearMode::RenderPass { is_color, .. } => {
clear_texture_via_render_passes(dst_texture, range, is_color, encoder)
}
TextureClearMode::None => {
return Err(ClearError::NoValidTextureClearMode(
dst_texture.error_ident(),
));
}
}
Ok(())
}
fn clear_texture_via_buffer_copies(
texture_desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
alignments: &hal::Alignments,
zero_buffer: &dyn hal::DynBuffer, // Buffer of size device::ZERO_BUFFER_SIZE
range: TextureInitRange,
encoder: &mut dyn hal::DynCommandEncoder,
dst_raw: &dyn hal::DynTexture,
) {
assert!(!texture_desc.format.is_depth_stencil_format());
if texture_desc.format == wgt::TextureFormat::NV12 {
// TODO: Currently COPY_DST for NV12 textures is unsupported.
return;
}
// Gather list of zero_buffer copies and issue a single command then to perform them
let mut zero_buffer_copy_regions = Vec::new();
let buffer_copy_pitch = alignments.buffer_copy_pitch.get() as u32;
let (block_width, block_height) = texture_desc.format.block_dimensions();
let block_size = texture_desc.format.block_copy_size(None).unwrap();
let bytes_per_row_alignment = get_lowest_common_denom(buffer_copy_pitch, block_size);
for mip_level in range.mip_range {
let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap();
// Round to multiple of block size
mip_size.width = align_to(mip_size.width, block_width);
mip_size.height = align_to(mip_size.height, block_height);
let bytes_per_row = align_to(
mip_size.width / block_width * block_size,
bytes_per_row_alignment,
);
let max_rows_per_copy = crate::device::ZERO_BUFFER_SIZE as u32 / bytes_per_row;
// round down to a multiple of rows needed by the texture format
let max_rows_per_copy = max_rows_per_copy / block_height * block_height;
assert!(
max_rows_per_copy > 0,
"Zero buffer size is too small to fill a single row \
of a texture with format {:?} and desc {:?}",
texture_desc.format,
texture_desc.size
);
let z_range = 0..(if texture_desc.dimension == wgt::TextureDimension::D3 {
mip_size.depth_or_array_layers
} else {
1
});
for array_layer in range.layer_range.clone() {
// TODO: Only doing one layer at a time for volume textures right now.
for z in z_range.clone() {
// May need multiple copies for each subresource! However, we
// assume that we never need to split a row.
let mut num_rows_left = mip_size.height;
while num_rows_left > 0 {
let num_rows = num_rows_left.min(max_rows_per_copy);
zero_buffer_copy_regions.push(hal::BufferTextureCopy {
buffer_layout: wgt::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(bytes_per_row),
rows_per_image: None,
},
texture_base: hal::TextureCopyBase {
mip_level,
array_layer,
origin: wgt::Origin3d {
x: 0, // Always full rows
y: mip_size.height - num_rows_left,
z,
},
aspect: hal::FormatAspects::COLOR,
},
size: hal::CopyExtent {
width: mip_size.width, // full row
height: num_rows,
depth: 1, // Only single slice of volume texture at a time right now
},
});
num_rows_left -= num_rows;
}
}
}
}
unsafe {
encoder.copy_buffer_to_texture(zero_buffer, dst_raw, &zero_buffer_copy_regions);
}
}
fn clear_texture_via_render_passes(
dst_texture: &Texture,
range: TextureInitRange,
is_color: bool,
encoder: &mut dyn hal::DynCommandEncoder,
) {
assert_eq!(dst_texture.desc.dimension, wgt::TextureDimension::D2);
let extent_base = wgt::Extent3d {
width: dst_texture.desc.size.width,
height: dst_texture.desc.size.height,
depth_or_array_layers: 1, // Only one layer is cleared at a time.
};
for mip_level in range.mip_range {
let extent = extent_base.mip_level_size(mip_level, dst_texture.desc.dimension);
for depth_or_layer in range.layer_range.clone() {
let color_attachments_tmp;
let (color_attachments, depth_stencil_attachment) = if is_color {
color_attachments_tmp = [Some(hal::ColorAttachment {
target: hal::Attachment {
view: Texture::get_clear_view(
&dst_texture.clear_mode,
&dst_texture.desc,
mip_level,
depth_or_layer,
),
usage: hal::TextureUses::COLOR_TARGET,
},
resolve_target: None,
ops: hal::AttachmentOps::STORE,
clear_value: wgt::Color::TRANSPARENT,
})];
(&color_attachments_tmp[..], None)
} else {
(
&[][..],
Some(hal::DepthStencilAttachment {
target: hal::Attachment {
view: Texture::get_clear_view(
&dst_texture.clear_mode,
&dst_texture.desc,
mip_level,
depth_or_layer,
),
usage: hal::TextureUses::DEPTH_STENCIL_WRITE,
},
depth_ops: hal::AttachmentOps::STORE,
stencil_ops: hal::AttachmentOps::STORE,
clear_value: (0.0, 0),
}),
)
};
unsafe {
encoder.begin_render_pass(&hal::RenderPassDescriptor {
label: Some("(wgpu internal) clear_texture clear pass"),
extent,
sample_count: dst_texture.desc.sample_count,
color_attachments,
depth_stencil_attachment,
multiview: None,
timestamp_writes: None,
occlusion_query_set: None,
});
encoder.end_render_pass();
}
}
}
}

1341
vendor/wgpu-core/src/command/compute.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,255 @@
use std::sync::Arc;
use crate::{
binding_model::BindGroup,
id,
pipeline::ComputePipeline,
resource::{Buffer, QuerySet},
};
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum ComputeCommand {
SetBindGroup {
index: u32,
num_dynamic_offsets: usize,
bind_group_id: Option<id::BindGroupId>,
},
SetPipeline(id::ComputePipelineId),
/// Set a range of push constants to values stored in `push_constant_data`.
SetPushConstant {
/// The byte offset within the push constant storage to write to. This
/// must be a multiple of four.
offset: u32,
/// The number of bytes to write. This must be a multiple of four.
size_bytes: u32,
/// Index in `push_constant_data` of the start of the data
/// to be written.
///
/// Note: this is not a byte offset like `offset`. Rather, it is the
/// index of the first `u32` element in `push_constant_data` to read.
values_offset: u32,
},
Dispatch([u32; 3]),
DispatchIndirect {
buffer_id: id::BufferId,
offset: wgt::BufferAddress,
},
PushDebugGroup {
color: u32,
len: usize,
},
PopDebugGroup,
InsertDebugMarker {
color: u32,
len: usize,
},
WriteTimestamp {
query_set_id: id::QuerySetId,
query_index: u32,
},
BeginPipelineStatisticsQuery {
query_set_id: id::QuerySetId,
query_index: u32,
},
EndPipelineStatisticsQuery,
}
impl ComputeCommand {
/// Resolves all ids in a list of commands into the corresponding resource Arc.
#[cfg(any(feature = "serde", feature = "replay"))]
pub fn resolve_compute_command_ids(
hub: &crate::hub::Hub,
commands: &[ComputeCommand],
) -> Result<Vec<ArcComputeCommand>, super::ComputePassError> {
use super::{ComputePassError, PassErrorScope};
let buffers_guard = hub.buffers.read();
let bind_group_guard = hub.bind_groups.read();
let query_set_guard = hub.query_sets.read();
let pipelines_guard = hub.compute_pipelines.read();
let resolved_commands: Vec<ArcComputeCommand> = commands
.iter()
.map(|c| -> Result<ArcComputeCommand, ComputePassError> {
Ok(match *c {
ComputeCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group_id,
} => {
if bind_group_id.is_none() {
return Ok(ArcComputeCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group: None,
});
}
let bind_group_id = bind_group_id.unwrap();
let bg = bind_group_guard.get(bind_group_id).get().map_err(|e| {
ComputePassError {
scope: PassErrorScope::SetBindGroup,
inner: e.into(),
}
})?;
ArcComputeCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group: Some(bg),
}
}
ComputeCommand::SetPipeline(pipeline_id) => ArcComputeCommand::SetPipeline(
pipelines_guard
.get(pipeline_id)
.get()
.map_err(|e| ComputePassError {
scope: PassErrorScope::SetPipelineCompute,
inner: e.into(),
})?,
),
ComputeCommand::SetPushConstant {
offset,
size_bytes,
values_offset,
} => ArcComputeCommand::SetPushConstant {
offset,
size_bytes,
values_offset,
},
ComputeCommand::Dispatch(dim) => ArcComputeCommand::Dispatch(dim),
ComputeCommand::DispatchIndirect { buffer_id, offset } => {
ArcComputeCommand::DispatchIndirect {
buffer: buffers_guard.get(buffer_id).get().map_err(|e| {
ComputePassError {
scope: PassErrorScope::Dispatch { indirect: true },
inner: e.into(),
}
})?,
offset,
}
}
ComputeCommand::PushDebugGroup { color, len } => {
ArcComputeCommand::PushDebugGroup { color, len }
}
ComputeCommand::PopDebugGroup => ArcComputeCommand::PopDebugGroup,
ComputeCommand::InsertDebugMarker { color, len } => {
ArcComputeCommand::InsertDebugMarker { color, len }
}
ComputeCommand::WriteTimestamp {
query_set_id,
query_index,
} => ArcComputeCommand::WriteTimestamp {
query_set: query_set_guard.get(query_set_id).get().map_err(|e| {
ComputePassError {
scope: PassErrorScope::WriteTimestamp,
inner: e.into(),
}
})?,
query_index,
},
ComputeCommand::BeginPipelineStatisticsQuery {
query_set_id,
query_index,
} => ArcComputeCommand::BeginPipelineStatisticsQuery {
query_set: query_set_guard.get(query_set_id).get().map_err(|e| {
ComputePassError {
scope: PassErrorScope::BeginPipelineStatisticsQuery,
inner: e.into(),
}
})?,
query_index,
},
ComputeCommand::EndPipelineStatisticsQuery => {
ArcComputeCommand::EndPipelineStatisticsQuery
}
})
})
.collect::<Result<Vec<_>, ComputePassError>>()?;
Ok(resolved_commands)
}
}
/// Equivalent to `ComputeCommand` but the Ids resolved into resource Arcs.
#[derive(Clone, Debug)]
pub enum ArcComputeCommand {
SetBindGroup {
index: u32,
num_dynamic_offsets: usize,
bind_group: Option<Arc<BindGroup>>,
},
SetPipeline(Arc<ComputePipeline>),
/// Set a range of push constants to values stored in `push_constant_data`.
SetPushConstant {
/// The byte offset within the push constant storage to write to. This
/// must be a multiple of four.
offset: u32,
/// The number of bytes to write. This must be a multiple of four.
size_bytes: u32,
/// Index in `push_constant_data` of the start of the data
/// to be written.
///
/// Note: this is not a byte offset like `offset`. Rather, it is the
/// index of the first `u32` element in `push_constant_data` to read.
values_offset: u32,
},
Dispatch([u32; 3]),
DispatchIndirect {
buffer: Arc<Buffer>,
offset: wgt::BufferAddress,
},
PushDebugGroup {
#[cfg_attr(target_os = "emscripten", allow(dead_code))]
color: u32,
len: usize,
},
PopDebugGroup,
InsertDebugMarker {
#[cfg_attr(target_os = "emscripten", allow(dead_code))]
color: u32,
len: usize,
},
WriteTimestamp {
query_set: Arc<QuerySet>,
query_index: u32,
},
BeginPipelineStatisticsQuery {
query_set: Arc<QuerySet>,
query_index: u32,
},
EndPipelineStatisticsQuery,
}

107
vendor/wgpu-core/src/command/draw.rs vendored Normal file
View File

@@ -0,0 +1,107 @@
use crate::{
binding_model::{LateMinBufferBindingSizeMismatch, PushConstantUploadError},
resource::{
DestroyedResourceError, MissingBufferUsageError, MissingTextureUsageError,
ResourceErrorIdent,
},
track::ResourceUsageCompatibilityError,
};
use wgt::VertexStepMode;
use thiserror::Error;
use super::bind::BinderError;
/// Error validating a draw call.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum DrawError {
#[error("Blend constant needs to be set")]
MissingBlendConstant,
#[error("Render pipeline must be set")]
MissingPipeline,
#[error("Currently set {pipeline} requires vertex buffer {index} to be set")]
MissingVertexBuffer {
pipeline: ResourceErrorIdent,
index: u32,
},
#[error("Index buffer must be set")]
MissingIndexBuffer,
#[error(transparent)]
IncompatibleBindGroup(#[from] Box<BinderError>),
#[error("Vertex {last_vertex} extends beyond limit {vertex_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Vertex` step-rate vertex buffer?")]
VertexBeyondLimit {
last_vertex: u64,
vertex_limit: u64,
slot: u32,
},
#[error("{step_mode:?} buffer out of bounds at slot {slot}. Offset {offset} beyond limit {limit}. Did you bind the correct `Vertex` step-rate vertex buffer?")]
VertexOutOfBounds {
step_mode: VertexStepMode,
offset: u64,
limit: u64,
slot: u32,
},
#[error("Instance {last_instance} extends beyond limit {instance_limit} imposed by the buffer in slot {slot}. Did you bind the correct `Instance` step-rate vertex buffer?")]
InstanceBeyondLimit {
last_instance: u64,
instance_limit: u64,
slot: u32,
},
#[error("Index {last_index} extends beyond limit {index_limit}. Did you bind the correct index buffer?")]
IndexBeyondLimit { last_index: u64, index_limit: u64 },
#[error(
"Index buffer format {buffer_format:?} doesn't match {pipeline}'s index format {pipeline_format:?}"
)]
UnmatchedIndexFormats {
pipeline: ResourceErrorIdent,
pipeline_format: wgt::IndexFormat,
buffer_format: wgt::IndexFormat,
},
#[error(transparent)]
BindingSizeTooSmall(#[from] LateMinBufferBindingSizeMismatch),
}
/// Error encountered when encoding a render command.
/// This is the shared error set between render bundles and passes.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum RenderCommandError {
#[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")]
BindGroupIndexOutOfRange { index: u32, max: u32 },
#[error("Vertex buffer index {index} is greater than the device's requested `max_vertex_buffers` limit {max}")]
VertexBufferIndexOutOfRange { index: u32, max: u32 },
#[error("Render pipeline targets are incompatible with render pass")]
IncompatiblePipelineTargets(#[from] crate::device::RenderPassCompatibilityError),
#[error("{0} writes to depth, while the pass has read-only depth access")]
IncompatibleDepthAccess(ResourceErrorIdent),
#[error("{0} writes to stencil, while the pass has read-only stencil access")]
IncompatibleStencilAccess(ResourceErrorIdent),
#[error(transparent)]
ResourceUsageCompatibility(#[from] ResourceUsageCompatibilityError),
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error(transparent)]
MissingBufferUsage(#[from] MissingBufferUsageError),
#[error(transparent)]
MissingTextureUsage(#[from] MissingTextureUsageError),
#[error(transparent)]
PushConstants(#[from] PushConstantUploadError),
#[error("Viewport has invalid rect {0:?}; origin and/or size is less than or equal to 0, and/or is not contained in the render target {1:?}")]
InvalidViewportRect(Rect<f32>, wgt::Extent3d),
#[error("Viewport minDepth {0} and/or maxDepth {1} are not in [0, 1]")]
InvalidViewportDepth(f32, f32),
#[error("Scissor {0:?} is not contained in the render target {1:?}")]
InvalidScissorRect(Rect<u32>, wgt::Extent3d),
#[error("Support for {0} is not implemented yet")]
Unimplemented(&'static str),
}
#[derive(Clone, Copy, Debug, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Rect<T> {
pub x: T,
pub y: T,
pub w: T,
pub h: T,
}

View File

@@ -0,0 +1,330 @@
use std::{collections::hash_map::Entry, ops::Range, sync::Arc, vec::Drain};
use crate::{
device::Device,
init_tracker::*,
resource::{DestroyedResourceError, ParentDevice, Texture, Trackable},
snatch::SnatchGuard,
track::{DeviceTracker, TextureTracker},
FastHashMap,
};
use super::{clear::clear_texture, BakedCommands, ClearError};
/// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass.
/// Any read access to this surface needs to be preceded by a texture initialization.
#[derive(Clone)]
pub(crate) struct TextureSurfaceDiscard {
pub texture: Arc<Texture>,
pub mip_level: u32,
pub layer: u32,
}
pub(crate) type SurfacesInDiscardState = Vec<TextureSurfaceDiscard>;
#[derive(Default)]
pub(crate) struct CommandBufferTextureMemoryActions {
/// The tracker actions that we need to be executed before the command
/// buffer is executed.
init_actions: Vec<TextureInitTrackerAction>,
/// All the discards that haven't been followed by init again within the
/// command buffer i.e. everything in this list resets the texture init
/// state *after* the command buffer execution
discards: Vec<TextureSurfaceDiscard>,
}
impl CommandBufferTextureMemoryActions {
pub(crate) fn drain_init_actions(&mut self) -> Drain<TextureInitTrackerAction> {
self.init_actions.drain(..)
}
pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) {
self.discards.push(discard);
}
// Registers a TextureInitTrackerAction.
// Returns previously discarded surface that need to be initialized *immediately* now.
// Only returns a non-empty list if action is MemoryInitKind::NeedsInitializedMemory.
#[must_use]
pub(crate) fn register_init_action(
&mut self,
action: &TextureInitTrackerAction,
) -> SurfacesInDiscardState {
let mut immediately_necessary_clears = SurfacesInDiscardState::new();
// Note that within a command buffer we may stack arbitrary memory init
// actions on the same texture Since we react to them in sequence, they
// are going to be dropped again at queue submit
//
// We don't need to add MemoryInitKind::NeedsInitializedMemory to
// init_actions if a surface is part of the discard list. But that would
// mean splitting up the action which is more than we'd win here.
self.init_actions.extend(
action
.texture
.initialization_status
.read()
.check_action(action),
);
// We expect very few discarded surfaces at any point in time which is
// why a simple linear search is likely best. (i.e. most of the time
// self.discards is empty!)
let init_actions = &mut self.init_actions;
self.discards.retain(|discarded_surface| {
if discarded_surface.texture.is_equal(&action.texture)
&& action.range.layer_range.contains(&discarded_surface.layer)
&& action
.range
.mip_range
.contains(&discarded_surface.mip_level)
{
if let MemoryInitKind::NeedsInitializedMemory = action.kind {
immediately_necessary_clears.push(discarded_surface.clone());
// Mark surface as implicitly initialized (this is relevant
// because it might have been uninitialized prior to
// discarding
init_actions.push(TextureInitTrackerAction {
texture: discarded_surface.texture.clone(),
range: TextureInitRange {
mip_range: discarded_surface.mip_level
..(discarded_surface.mip_level + 1),
layer_range: discarded_surface.layer..(discarded_surface.layer + 1),
},
kind: MemoryInitKind::ImplicitlyInitialized,
});
}
false
} else {
true
}
});
immediately_necessary_clears
}
// Shortcut for register_init_action when it is known that the action is an
// implicit init, not requiring any immediate resource init.
pub(crate) fn register_implicit_init(
&mut self,
texture: &Arc<Texture>,
range: TextureInitRange,
) {
let must_be_empty = self.register_init_action(&TextureInitTrackerAction {
texture: texture.clone(),
range,
kind: MemoryInitKind::ImplicitlyInitialized,
});
assert!(must_be_empty.is_empty());
}
}
// Utility function that takes discarded surfaces from (several calls to)
// register_init_action and initializes them on the spot.
//
// Takes care of barriers as well!
pub(crate) fn fixup_discarded_surfaces<InitIter: Iterator<Item = TextureSurfaceDiscard>>(
inits: InitIter,
encoder: &mut dyn hal::DynCommandEncoder,
texture_tracker: &mut TextureTracker,
device: &Device,
snatch_guard: &SnatchGuard<'_>,
) {
for init in inits {
clear_texture(
&init.texture,
TextureInitRange {
mip_range: init.mip_level..(init.mip_level + 1),
layer_range: init.layer..(init.layer + 1),
},
encoder,
texture_tracker,
&device.alignments,
device.zero_buffer.as_ref(),
snatch_guard,
)
.unwrap();
}
}
impl BakedCommands {
// inserts all buffer initializations that are going to be needed for
// executing the commands and updates resource init states accordingly
pub(crate) fn initialize_buffer_memory(
&mut self,
device_tracker: &mut DeviceTracker,
snatch_guard: &SnatchGuard<'_>,
) -> Result<(), DestroyedResourceError> {
profiling::scope!("initialize_buffer_memory");
// Gather init ranges for each buffer so we can collapse them.
// It is not possible to do this at an earlier point since previously
// executed command buffer change the resource init state.
let mut uninitialized_ranges_per_buffer = FastHashMap::default();
for buffer_use in self.buffer_memory_init_actions.drain(..) {
let mut initialization_status = buffer_use.buffer.initialization_status.write();
// align the end to 4
let end_remainder = buffer_use.range.end % wgt::COPY_BUFFER_ALIGNMENT;
let end = if end_remainder == 0 {
buffer_use.range.end
} else {
buffer_use.range.end + wgt::COPY_BUFFER_ALIGNMENT - end_remainder
};
let uninitialized_ranges = initialization_status.drain(buffer_use.range.start..end);
match buffer_use.kind {
MemoryInitKind::ImplicitlyInitialized => {}
MemoryInitKind::NeedsInitializedMemory => {
match uninitialized_ranges_per_buffer.entry(buffer_use.buffer.tracker_index()) {
Entry::Vacant(e) => {
e.insert((
buffer_use.buffer.clone(),
uninitialized_ranges.collect::<Vec<Range<wgt::BufferAddress>>>(),
));
}
Entry::Occupied(mut e) => {
e.get_mut().1.extend(uninitialized_ranges);
}
}
}
}
}
for (buffer, mut ranges) in uninitialized_ranges_per_buffer.into_values() {
// Collapse touching ranges.
ranges.sort_by_key(|r| r.start);
for i in (1..ranges.len()).rev() {
// The memory init tracker made sure of this!
assert!(ranges[i - 1].end <= ranges[i].start);
if ranges[i].start == ranges[i - 1].end {
ranges[i - 1].end = ranges[i].end;
ranges.swap_remove(i); // Ordering not important at this point
}
}
// Don't do use_replace since the buffer may already no longer have
// a ref_count.
//
// However, we *know* that it is currently in use, so the tracker
// must already know about it.
let transition = device_tracker
.buffers
.set_single(&buffer, hal::BufferUses::COPY_DST);
let raw_buf = buffer.try_raw(snatch_guard)?;
unsafe {
self.encoder.raw.transition_buffers(
transition
.map(|pending| pending.into_hal(&buffer, snatch_guard))
.as_slice(),
);
}
for range in ranges.iter() {
assert!(
range.start % wgt::COPY_BUFFER_ALIGNMENT == 0,
"Buffer {:?} has an uninitialized range with a start \
not aligned to 4 (start was {})",
raw_buf,
range.start
);
assert!(
range.end % wgt::COPY_BUFFER_ALIGNMENT == 0,
"Buffer {:?} has an uninitialized range with an end \
not aligned to 4 (end was {})",
raw_buf,
range.end
);
unsafe {
self.encoder.raw.clear_buffer(raw_buf, range.clone());
}
}
}
Ok(())
}
// inserts all texture initializations that are going to be needed for
// executing the commands and updates resource init states accordingly any
// textures that are left discarded by this command buffer will be marked as
// uninitialized
pub(crate) fn initialize_texture_memory(
&mut self,
device_tracker: &mut DeviceTracker,
device: &Device,
snatch_guard: &SnatchGuard<'_>,
) -> Result<(), DestroyedResourceError> {
profiling::scope!("initialize_texture_memory");
let mut ranges: Vec<TextureInitRange> = Vec::new();
for texture_use in self.texture_memory_actions.drain_init_actions() {
let mut initialization_status = texture_use.texture.initialization_status.write();
let use_range = texture_use.range;
let affected_mip_trackers = initialization_status
.mips
.iter_mut()
.enumerate()
.skip(use_range.mip_range.start as usize)
.take((use_range.mip_range.end - use_range.mip_range.start) as usize);
match texture_use.kind {
MemoryInitKind::ImplicitlyInitialized => {
for (_, mip_tracker) in affected_mip_trackers {
mip_tracker.drain(use_range.layer_range.clone());
}
}
MemoryInitKind::NeedsInitializedMemory => {
for (mip_level, mip_tracker) in affected_mip_trackers {
for layer_range in mip_tracker.drain(use_range.layer_range.clone()) {
ranges.push(TextureInitRange {
mip_range: (mip_level as u32)..(mip_level as u32 + 1),
layer_range,
});
}
}
}
}
// TODO: Could we attempt some range collapsing here?
for range in ranges.drain(..) {
let clear_result = clear_texture(
&texture_use.texture,
range,
self.encoder.raw.as_mut(),
&mut device_tracker.textures,
&device.alignments,
device.zero_buffer.as_ref(),
snatch_guard,
);
// A Texture can be destroyed between the command recording
// and now, this is out of our control so we have to handle
// it gracefully.
if let Err(ClearError::DestroyedResource(e)) = clear_result {
return Err(e);
}
// Other errors are unexpected.
if let Err(error) = clear_result {
panic!("{error}");
}
}
}
// Now that all buffers/textures have the proper init state for before
// cmdbuf start, we discard init states for textures it left discarded
// after its execution.
for surface_discard in self.texture_memory_actions.discards.iter() {
surface_discard
.texture
.initialization_status
.write()
.discard(surface_discard.mip_level, surface_discard.layer);
}
Ok(())
}
}

1075
vendor/wgpu-core/src/command/mod.rs vendored Normal file

File diff suppressed because it is too large Load Diff

467
vendor/wgpu-core/src/command/query.rs vendored Normal file
View File

@@ -0,0 +1,467 @@
#[cfg(feature = "trace")]
use crate::device::trace::Command as TraceCommand;
use crate::{
command::{CommandBuffer, CommandEncoderError},
device::{DeviceError, MissingFeatures},
global::Global,
id,
init_tracker::MemoryInitKind,
resource::{
DestroyedResourceError, InvalidResourceError, MissingBufferUsageError, ParentDevice,
QuerySet, Trackable,
},
track::{StatelessTracker, TrackerIndex},
FastHashMap,
};
use std::{iter, sync::Arc};
use thiserror::Error;
use wgt::BufferAddress;
#[derive(Debug)]
pub(crate) struct QueryResetMap {
map: FastHashMap<TrackerIndex, (Vec<bool>, Arc<QuerySet>)>,
}
impl QueryResetMap {
pub fn new() -> Self {
Self {
map: FastHashMap::default(),
}
}
pub fn use_query_set(&mut self, query_set: &Arc<QuerySet>, query: u32) -> bool {
let vec_pair = self
.map
.entry(query_set.tracker_index())
.or_insert_with(|| {
(
vec![false; query_set.desc.count as usize],
query_set.clone(),
)
});
std::mem::replace(&mut vec_pair.0[query as usize], true)
}
pub fn reset_queries(&mut self, raw_encoder: &mut dyn hal::DynCommandEncoder) {
for (_, (state, query_set)) in self.map.drain() {
debug_assert_eq!(state.len(), query_set.desc.count as usize);
// Need to find all "runs" of values which need resets. If the state vector is:
// [false, true, true, false, true], we want to reset [1..3, 4..5]. This minimizes
// the amount of resets needed.
let mut run_start: Option<u32> = None;
for (idx, value) in state.into_iter().chain(iter::once(false)).enumerate() {
match (run_start, value) {
// We're inside of a run, do nothing
(Some(..), true) => {}
// We've hit the end of a run, dispatch a reset
(Some(start), false) => {
run_start = None;
unsafe { raw_encoder.reset_queries(query_set.raw(), start..idx as u32) };
}
// We're starting a run
(None, true) => {
run_start = Some(idx as u32);
}
// We're in a run of falses, do nothing.
(None, false) => {}
}
}
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum SimplifiedQueryType {
Occlusion,
Timestamp,
PipelineStatistics,
}
impl From<wgt::QueryType> for SimplifiedQueryType {
fn from(q: wgt::QueryType) -> Self {
match q {
wgt::QueryType::Occlusion => SimplifiedQueryType::Occlusion,
wgt::QueryType::Timestamp => SimplifiedQueryType::Timestamp,
wgt::QueryType::PipelineStatistics(..) => SimplifiedQueryType::PipelineStatistics,
}
}
}
/// Error encountered when dealing with queries
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum QueryError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
Encoder(#[from] CommandEncoderError),
#[error(transparent)]
MissingFeature(#[from] MissingFeatures),
#[error("Error encountered while trying to use queries")]
Use(#[from] QueryUseError),
#[error("Error encountered while trying to resolve a query")]
Resolve(#[from] ResolveError),
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error(transparent)]
InvalidResource(#[from] InvalidResourceError),
}
/// Error encountered while trying to use queries
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum QueryUseError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")]
OutOfBounds {
query_index: u32,
query_set_size: u32,
},
#[error("Query {query_index} has already been used within the same renderpass. Queries must only be used once per renderpass")]
UsedTwiceInsideRenderpass { query_index: u32 },
#[error("Query {new_query_index} was started while query {active_query_index} was already active. No more than one statistic or occlusion query may be active at once")]
AlreadyStarted {
active_query_index: u32,
new_query_index: u32,
},
#[error("Query was stopped while there was no active query")]
AlreadyStopped,
#[error("A query of type {query_type:?} was started using a query set of type {set_type:?}")]
IncompatibleType {
set_type: SimplifiedQueryType,
query_type: SimplifiedQueryType,
},
}
/// Error encountered while trying to resolve a query.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ResolveError {
#[error(transparent)]
MissingBufferUsage(#[from] MissingBufferUsageError),
#[error("Resolve buffer offset has to be aligned to `QUERY_RESOLVE_BUFFER_ALIGNMENT")]
BufferOffsetAlignment,
#[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")]
QueryOverrun {
start_query: u32,
end_query: u32,
query_set_size: u32,
},
#[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overrunning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")]
BufferOverrun {
start_query: u32,
end_query: u32,
stride: u32,
buffer_size: BufferAddress,
buffer_start_offset: BufferAddress,
buffer_end_offset: BufferAddress,
},
}
impl QuerySet {
pub(crate) fn validate_query(
self: &Arc<Self>,
query_type: SimplifiedQueryType,
query_index: u32,
reset_state: Option<&mut QueryResetMap>,
) -> Result<(), QueryUseError> {
// We need to defer our resets because we are in a renderpass,
// add the usage to the reset map.
if let Some(reset) = reset_state {
let used = reset.use_query_set(self, query_index);
if used {
return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index });
}
}
let simple_set_type = SimplifiedQueryType::from(self.desc.ty);
if simple_set_type != query_type {
return Err(QueryUseError::IncompatibleType {
query_type,
set_type: simple_set_type,
});
}
if query_index >= self.desc.count {
return Err(QueryUseError::OutOfBounds {
query_index,
query_set_size: self.desc.count,
});
}
Ok(())
}
pub(super) fn validate_and_write_timestamp(
self: &Arc<Self>,
raw_encoder: &mut dyn hal::DynCommandEncoder,
query_index: u32,
reset_state: Option<&mut QueryResetMap>,
) -> Result<(), QueryUseError> {
let needs_reset = reset_state.is_none();
self.validate_query(SimplifiedQueryType::Timestamp, query_index, reset_state)?;
unsafe {
// If we don't have a reset state tracker which can defer resets, we must reset now.
if needs_reset {
raw_encoder.reset_queries(self.raw(), query_index..(query_index + 1));
}
raw_encoder.write_timestamp(self.raw(), query_index);
}
Ok(())
}
}
pub(super) fn validate_and_begin_occlusion_query(
query_set: Arc<QuerySet>,
raw_encoder: &mut dyn hal::DynCommandEncoder,
tracker: &mut StatelessTracker<QuerySet>,
query_index: u32,
reset_state: Option<&mut QueryResetMap>,
active_query: &mut Option<(Arc<QuerySet>, u32)>,
) -> Result<(), QueryUseError> {
let needs_reset = reset_state.is_none();
query_set.validate_query(SimplifiedQueryType::Occlusion, query_index, reset_state)?;
tracker.insert_single(query_set.clone());
if let Some((_old, old_idx)) = active_query.take() {
return Err(QueryUseError::AlreadyStarted {
active_query_index: old_idx,
new_query_index: query_index,
});
}
let (query_set, _) = &active_query.insert((query_set, query_index));
unsafe {
// If we don't have a reset state tracker which can defer resets, we must reset now.
if needs_reset {
raw_encoder.reset_queries(query_set.raw(), query_index..(query_index + 1));
}
raw_encoder.begin_query(query_set.raw(), query_index);
}
Ok(())
}
pub(super) fn end_occlusion_query(
raw_encoder: &mut dyn hal::DynCommandEncoder,
active_query: &mut Option<(Arc<QuerySet>, u32)>,
) -> Result<(), QueryUseError> {
if let Some((query_set, query_index)) = active_query.take() {
unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
Ok(())
} else {
Err(QueryUseError::AlreadyStopped)
}
}
pub(super) fn validate_and_begin_pipeline_statistics_query(
query_set: Arc<QuerySet>,
raw_encoder: &mut dyn hal::DynCommandEncoder,
tracker: &mut StatelessTracker<QuerySet>,
cmd_buf: &CommandBuffer,
query_index: u32,
reset_state: Option<&mut QueryResetMap>,
active_query: &mut Option<(Arc<QuerySet>, u32)>,
) -> Result<(), QueryUseError> {
query_set.same_device_as(cmd_buf)?;
let needs_reset = reset_state.is_none();
query_set.validate_query(
SimplifiedQueryType::PipelineStatistics,
query_index,
reset_state,
)?;
tracker.insert_single(query_set.clone());
if let Some((_old, old_idx)) = active_query.take() {
return Err(QueryUseError::AlreadyStarted {
active_query_index: old_idx,
new_query_index: query_index,
});
}
let (query_set, _) = &active_query.insert((query_set, query_index));
unsafe {
// If we don't have a reset state tracker which can defer resets, we must reset now.
if needs_reset {
raw_encoder.reset_queries(query_set.raw(), query_index..(query_index + 1));
}
raw_encoder.begin_query(query_set.raw(), query_index);
}
Ok(())
}
pub(super) fn end_pipeline_statistics_query(
raw_encoder: &mut dyn hal::DynCommandEncoder,
active_query: &mut Option<(Arc<QuerySet>, u32)>,
) -> Result<(), QueryUseError> {
if let Some((query_set, query_index)) = active_query.take() {
unsafe { raw_encoder.end_query(query_set.raw(), query_index) };
Ok(())
} else {
Err(QueryUseError::AlreadyStopped)
}
}
impl Global {
pub fn command_encoder_write_timestamp(
&self,
command_encoder_id: id::CommandEncoderId,
query_set_id: id::QuerySetId,
query_index: u32,
) -> Result<(), QueryError> {
let hub = &self.hub;
let cmd_buf = hub
.command_buffers
.get(command_encoder_id.into_command_buffer_id());
let mut cmd_buf_data = cmd_buf.data.lock();
let mut cmd_buf_data_guard = cmd_buf_data.record()?;
let cmd_buf_data = &mut *cmd_buf_data_guard;
cmd_buf
.device
.require_features(wgt::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS)?;
#[cfg(feature = "trace")]
if let Some(ref mut list) = cmd_buf_data.commands {
list.push(TraceCommand::WriteTimestamp {
query_set_id,
query_index,
});
}
let raw_encoder = cmd_buf_data.encoder.open()?;
let query_set = hub.query_sets.get(query_set_id).get()?;
query_set.validate_and_write_timestamp(raw_encoder, query_index, None)?;
cmd_buf_data.trackers.query_sets.insert_single(query_set);
cmd_buf_data_guard.mark_successful();
Ok(())
}
pub fn command_encoder_resolve_query_set(
&self,
command_encoder_id: id::CommandEncoderId,
query_set_id: id::QuerySetId,
start_query: u32,
query_count: u32,
destination: id::BufferId,
destination_offset: BufferAddress,
) -> Result<(), QueryError> {
let hub = &self.hub;
let cmd_buf = hub
.command_buffers
.get(command_encoder_id.into_command_buffer_id());
let mut cmd_buf_data = cmd_buf.data.lock();
let mut cmd_buf_data_guard = cmd_buf_data.record()?;
let cmd_buf_data = &mut *cmd_buf_data_guard;
#[cfg(feature = "trace")]
if let Some(ref mut list) = cmd_buf_data.commands {
list.push(TraceCommand::ResolveQuerySet {
query_set_id,
start_query,
query_count,
destination,
destination_offset,
});
}
if destination_offset % wgt::QUERY_RESOLVE_BUFFER_ALIGNMENT != 0 {
return Err(QueryError::Resolve(ResolveError::BufferOffsetAlignment));
}
let query_set = hub.query_sets.get(query_set_id).get()?;
query_set.same_device_as(cmd_buf.as_ref())?;
let dst_buffer = hub.buffers.get(destination).get()?;
dst_buffer.same_device_as(cmd_buf.as_ref())?;
let snatch_guard = dst_buffer.device.snatchable_lock.read();
dst_buffer.check_destroyed(&snatch_guard)?;
let dst_pending = cmd_buf_data
.trackers
.buffers
.set_single(&dst_buffer, hal::BufferUses::COPY_DST);
let dst_barrier = dst_pending.map(|pending| pending.into_hal(&dst_buffer, &snatch_guard));
dst_buffer
.check_usage(wgt::BufferUsages::QUERY_RESOLVE)
.map_err(ResolveError::MissingBufferUsage)?;
let end_query = start_query + query_count;
if end_query > query_set.desc.count {
return Err(ResolveError::QueryOverrun {
start_query,
end_query,
query_set_size: query_set.desc.count,
}
.into());
}
let elements_per_query = match query_set.desc.ty {
wgt::QueryType::Occlusion => 1,
wgt::QueryType::PipelineStatistics(ps) => ps.bits().count_ones(),
wgt::QueryType::Timestamp => 1,
};
let stride = elements_per_query * wgt::QUERY_SIZE;
let bytes_used = (stride * query_count) as BufferAddress;
let buffer_start_offset = destination_offset;
let buffer_end_offset = buffer_start_offset + bytes_used;
if buffer_end_offset > dst_buffer.size {
return Err(ResolveError::BufferOverrun {
start_query,
end_query,
stride,
buffer_size: dst_buffer.size,
buffer_start_offset,
buffer_end_offset,
}
.into());
}
// TODO(https://github.com/gfx-rs/wgpu/issues/3993): Need to track initialization state.
cmd_buf_data.buffer_memory_init_actions.extend(
dst_buffer.initialization_status.read().create_action(
&dst_buffer,
buffer_start_offset..buffer_end_offset,
MemoryInitKind::ImplicitlyInitialized,
),
);
let raw_dst_buffer = dst_buffer.try_raw(&snatch_guard)?;
let raw_encoder = cmd_buf_data.encoder.open()?;
unsafe {
raw_encoder.transition_buffers(dst_barrier.as_slice());
raw_encoder.copy_query_results(
query_set.raw(),
start_query..end_query,
raw_dst_buffer,
destination_offset,
wgt::BufferSize::new_unchecked(stride as u64),
);
}
cmd_buf_data.trackers.query_sets.insert_single(query_set);
cmd_buf_data_guard.mark_successful();
Ok(())
}
}

File diff suppressed because it is too large Load Diff

3379
vendor/wgpu-core/src/command/render.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,500 @@
use crate::{
binding_model::BindGroup,
id,
pipeline::RenderPipeline,
resource::{Buffer, QuerySet},
};
use wgt::{BufferAddress, BufferSize, Color};
use std::sync::Arc;
use super::{Rect, RenderBundle};
#[doc(hidden)]
#[derive(Clone, Copy, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum RenderCommand {
SetBindGroup {
index: u32,
num_dynamic_offsets: usize,
bind_group_id: Option<id::BindGroupId>,
},
SetPipeline(id::RenderPipelineId),
SetIndexBuffer {
buffer_id: id::BufferId,
index_format: wgt::IndexFormat,
offset: BufferAddress,
size: Option<BufferSize>,
},
SetVertexBuffer {
slot: u32,
buffer_id: id::BufferId,
offset: BufferAddress,
size: Option<BufferSize>,
},
SetBlendConstant(Color),
SetStencilReference(u32),
SetViewport {
rect: Rect<f32>,
//TODO: use half-float to reduce the size?
depth_min: f32,
depth_max: f32,
},
SetScissor(Rect<u32>),
/// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
///
/// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
/// of the restrictions these commands must satisfy.
SetPushConstant {
/// Which stages we are setting push constant values for.
stages: wgt::ShaderStages,
/// The byte offset within the push constant storage to write to. This
/// must be a multiple of four.
offset: u32,
/// The number of bytes to write. This must be a multiple of four.
size_bytes: u32,
/// Index in [`BasePass::push_constant_data`] of the start of the data
/// to be written.
///
/// Note: this is not a byte offset like `offset`. Rather, it is the
/// index of the first `u32` element in `push_constant_data` to read.
///
/// `None` means zeros should be written to the destination range, and
/// there is no corresponding data in `push_constant_data`. This is used
/// by render bundles, which explicitly clear out any state that
/// post-bundle code might see.
values_offset: Option<u32>,
},
Draw {
vertex_count: u32,
instance_count: u32,
first_vertex: u32,
first_instance: u32,
},
DrawIndexed {
index_count: u32,
instance_count: u32,
first_index: u32,
base_vertex: i32,
first_instance: u32,
},
DrawIndirect {
buffer_id: id::BufferId,
offset: BufferAddress,
count: u32,
indexed: bool,
},
MultiDrawIndirectCount {
buffer_id: id::BufferId,
offset: BufferAddress,
count_buffer_id: id::BufferId,
count_buffer_offset: BufferAddress,
max_count: u32,
indexed: bool,
},
PushDebugGroup {
color: u32,
len: usize,
},
PopDebugGroup,
InsertDebugMarker {
color: u32,
len: usize,
},
WriteTimestamp {
query_set_id: id::QuerySetId,
query_index: u32,
},
BeginOcclusionQuery {
query_index: u32,
},
EndOcclusionQuery,
BeginPipelineStatisticsQuery {
query_set_id: id::QuerySetId,
query_index: u32,
},
EndPipelineStatisticsQuery,
ExecuteBundle(id::RenderBundleId),
}
impl RenderCommand {
/// Resolves all ids in a list of commands into the corresponding resource Arc.
#[cfg(any(feature = "serde", feature = "replay"))]
pub fn resolve_render_command_ids(
hub: &crate::hub::Hub,
commands: &[RenderCommand],
) -> Result<Vec<ArcRenderCommand>, super::RenderPassError> {
use super::{DrawKind, PassErrorScope, RenderPassError};
let buffers_guard = hub.buffers.read();
let bind_group_guard = hub.bind_groups.read();
let query_set_guard = hub.query_sets.read();
let pipelines_guard = hub.render_pipelines.read();
let render_bundles_guard = hub.render_bundles.read();
let resolved_commands: Vec<ArcRenderCommand> =
commands
.iter()
.map(|c| -> Result<ArcRenderCommand, RenderPassError> {
Ok(match *c {
RenderCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group_id,
} => {
if bind_group_id.is_none() {
return Ok(ArcRenderCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group: None,
});
}
let bind_group_id = bind_group_id.unwrap();
let bg = bind_group_guard.get(bind_group_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::SetBindGroup,
inner: e.into(),
}
})?;
ArcRenderCommand::SetBindGroup {
index,
num_dynamic_offsets,
bind_group: Some(bg),
}
}
RenderCommand::SetPipeline(pipeline_id) => ArcRenderCommand::SetPipeline(
pipelines_guard.get(pipeline_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::SetPipelineRender,
inner: e.into(),
}
})?,
),
RenderCommand::SetPushConstant {
offset,
size_bytes,
values_offset,
stages,
} => ArcRenderCommand::SetPushConstant {
offset,
size_bytes,
values_offset,
stages,
},
RenderCommand::PushDebugGroup { color, len } => {
ArcRenderCommand::PushDebugGroup { color, len }
}
RenderCommand::PopDebugGroup => ArcRenderCommand::PopDebugGroup,
RenderCommand::InsertDebugMarker { color, len } => {
ArcRenderCommand::InsertDebugMarker { color, len }
}
RenderCommand::WriteTimestamp {
query_set_id,
query_index,
} => ArcRenderCommand::WriteTimestamp {
query_set: query_set_guard.get(query_set_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::WriteTimestamp,
inner: e.into(),
}
})?,
query_index,
},
RenderCommand::BeginPipelineStatisticsQuery {
query_set_id,
query_index,
} => ArcRenderCommand::BeginPipelineStatisticsQuery {
query_set: query_set_guard.get(query_set_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::BeginPipelineStatisticsQuery,
inner: e.into(),
}
})?,
query_index,
},
RenderCommand::EndPipelineStatisticsQuery => {
ArcRenderCommand::EndPipelineStatisticsQuery
}
RenderCommand::SetIndexBuffer {
buffer_id,
index_format,
offset,
size,
} => ArcRenderCommand::SetIndexBuffer {
buffer: buffers_guard.get(buffer_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::SetIndexBuffer,
inner: e.into(),
}
})?,
index_format,
offset,
size,
},
RenderCommand::SetVertexBuffer {
slot,
buffer_id,
offset,
size,
} => ArcRenderCommand::SetVertexBuffer {
slot,
buffer: buffers_guard.get(buffer_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::SetVertexBuffer,
inner: e.into(),
}
})?,
offset,
size,
},
RenderCommand::SetBlendConstant(color) => {
ArcRenderCommand::SetBlendConstant(color)
}
RenderCommand::SetStencilReference(reference) => {
ArcRenderCommand::SetStencilReference(reference)
}
RenderCommand::SetViewport {
rect,
depth_min,
depth_max,
} => ArcRenderCommand::SetViewport {
rect,
depth_min,
depth_max,
},
RenderCommand::SetScissor(scissor) => ArcRenderCommand::SetScissor(scissor),
RenderCommand::Draw {
vertex_count,
instance_count,
first_vertex,
first_instance,
} => ArcRenderCommand::Draw {
vertex_count,
instance_count,
first_vertex,
first_instance,
},
RenderCommand::DrawIndexed {
index_count,
instance_count,
first_index,
base_vertex,
first_instance,
} => ArcRenderCommand::DrawIndexed {
index_count,
instance_count,
first_index,
base_vertex,
first_instance,
},
RenderCommand::DrawIndirect {
buffer_id,
offset,
count,
indexed,
} => ArcRenderCommand::DrawIndirect {
buffer: buffers_guard.get(buffer_id).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::Draw {
kind: if count != 1 {
DrawKind::MultiDrawIndirect
} else {
DrawKind::DrawIndirect
},
indexed,
},
inner: e.into(),
}
})?,
offset,
count,
indexed,
},
RenderCommand::MultiDrawIndirectCount {
buffer_id,
offset,
count_buffer_id,
count_buffer_offset,
max_count,
indexed,
} => {
let scope = PassErrorScope::Draw {
kind: DrawKind::MultiDrawIndirectCount,
indexed,
};
ArcRenderCommand::MultiDrawIndirectCount {
buffer: buffers_guard.get(buffer_id).get().map_err(|e| {
RenderPassError {
scope,
inner: e.into(),
}
})?,
offset,
count_buffer: buffers_guard.get(count_buffer_id).get().map_err(
|e| RenderPassError {
scope,
inner: e.into(),
},
)?,
count_buffer_offset,
max_count,
indexed,
}
}
RenderCommand::BeginOcclusionQuery { query_index } => {
ArcRenderCommand::BeginOcclusionQuery { query_index }
}
RenderCommand::EndOcclusionQuery => ArcRenderCommand::EndOcclusionQuery,
RenderCommand::ExecuteBundle(bundle) => ArcRenderCommand::ExecuteBundle(
render_bundles_guard.get(bundle).get().map_err(|e| {
RenderPassError {
scope: PassErrorScope::ExecuteBundle,
inner: e.into(),
}
})?,
),
})
})
.collect::<Result<Vec<_>, RenderPassError>>()?;
Ok(resolved_commands)
}
}
/// Equivalent to `RenderCommand` with the Ids resolved into resource Arcs.
#[doc(hidden)]
#[derive(Clone, Debug)]
pub enum ArcRenderCommand {
SetBindGroup {
index: u32,
num_dynamic_offsets: usize,
bind_group: Option<Arc<BindGroup>>,
},
SetPipeline(Arc<RenderPipeline>),
SetIndexBuffer {
buffer: Arc<Buffer>,
index_format: wgt::IndexFormat,
offset: BufferAddress,
size: Option<BufferSize>,
},
SetVertexBuffer {
slot: u32,
buffer: Arc<Buffer>,
offset: BufferAddress,
size: Option<BufferSize>,
},
SetBlendConstant(Color),
SetStencilReference(u32),
SetViewport {
rect: Rect<f32>,
depth_min: f32,
depth_max: f32,
},
SetScissor(Rect<u32>),
/// Set a range of push constants to values stored in [`BasePass::push_constant_data`].
///
/// See [`wgpu::RenderPass::set_push_constants`] for a detailed explanation
/// of the restrictions these commands must satisfy.
SetPushConstant {
/// Which stages we are setting push constant values for.
stages: wgt::ShaderStages,
/// The byte offset within the push constant storage to write to. This
/// must be a multiple of four.
offset: u32,
/// The number of bytes to write. This must be a multiple of four.
size_bytes: u32,
/// Index in [`BasePass::push_constant_data`] of the start of the data
/// to be written.
///
/// Note: this is not a byte offset like `offset`. Rather, it is the
/// index of the first `u32` element in `push_constant_data` to read.
///
/// `None` means zeros should be written to the destination range, and
/// there is no corresponding data in `push_constant_data`. This is used
/// by render bundles, which explicitly clear out any state that
/// post-bundle code might see.
values_offset: Option<u32>,
},
Draw {
vertex_count: u32,
instance_count: u32,
first_vertex: u32,
first_instance: u32,
},
DrawIndexed {
index_count: u32,
instance_count: u32,
first_index: u32,
base_vertex: i32,
first_instance: u32,
},
DrawIndirect {
buffer: Arc<Buffer>,
offset: BufferAddress,
count: u32,
indexed: bool,
},
MultiDrawIndirectCount {
buffer: Arc<Buffer>,
offset: BufferAddress,
count_buffer: Arc<Buffer>,
count_buffer_offset: BufferAddress,
max_count: u32,
indexed: bool,
},
PushDebugGroup {
#[cfg_attr(target_os = "emscripten", allow(dead_code))]
color: u32,
len: usize,
},
PopDebugGroup,
InsertDebugMarker {
#[cfg_attr(target_os = "emscripten", allow(dead_code))]
color: u32,
len: usize,
},
WriteTimestamp {
query_set: Arc<QuerySet>,
query_index: u32,
},
BeginOcclusionQuery {
query_index: u32,
},
EndOcclusionQuery,
BeginPipelineStatisticsQuery {
query_set: Arc<QuerySet>,
query_index: u32,
},
EndPipelineStatisticsQuery,
ExecuteBundle(Arc<RenderBundle>),
}

View File

@@ -0,0 +1,25 @@
use std::sync::Arc;
use crate::id;
/// Describes the writing of timestamp values in a render or compute pass.
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PassTimestampWrites {
/// The query set to write the timestamps to.
pub query_set: id::QuerySetId,
/// The index of the query set at which a start timestamp of this pass is written, if any.
pub beginning_of_pass_write_index: Option<u32>,
/// The index of the query set at which an end timestamp of this pass is written, if any.
pub end_of_pass_write_index: Option<u32>,
}
/// Describes the writing of timestamp values in a render or compute pass with the query set resolved.
pub struct ArcPassTimestampWrites {
/// The query set to write the timestamps to.
pub query_set: Arc<crate::resource::QuerySet>,
/// The index of the query set at which a start timestamp of this pass is written, if any.
pub beginning_of_pass_write_index: Option<u32>,
/// The index of the query set at which an end timestamp of this pass is written, if any.
pub end_of_pass_write_index: Option<u32>,
}

1185
vendor/wgpu-core/src/command/transfer.rs vendored Normal file

File diff suppressed because it is too large Load Diff

273
vendor/wgpu-core/src/conv.rs vendored Normal file
View File

@@ -0,0 +1,273 @@
use wgt::TextureFormatFeatures;
use crate::resource::{self, TextureDescriptor};
pub fn is_valid_copy_src_texture_format(
format: wgt::TextureFormat,
aspect: wgt::TextureAspect,
) -> bool {
use wgt::TextureAspect as Ta;
use wgt::TextureFormat as Tf;
match (format, aspect) {
(Tf::Depth24Plus, _) | (Tf::Depth24PlusStencil8, Ta::DepthOnly) => false,
_ => true,
}
}
pub fn is_valid_copy_dst_texture_format(
format: wgt::TextureFormat,
aspect: wgt::TextureAspect,
) -> bool {
use wgt::TextureAspect as Ta;
use wgt::TextureFormat as Tf;
match (format, aspect) {
(Tf::Depth24Plus | Tf::Depth32Float, _)
| (Tf::Depth24PlusStencil8 | Tf::Depth32FloatStencil8, Ta::DepthOnly) => false,
_ => true,
}
}
#[cfg_attr(
any(not(target_arch = "wasm32"), target_os = "emscripten"),
allow(unused)
)]
pub fn is_valid_external_image_copy_dst_texture_format(format: wgt::TextureFormat) -> bool {
use wgt::TextureFormat as Tf;
match format {
Tf::R8Unorm
| Tf::R16Float
| Tf::R32Float
| Tf::Rg8Unorm
| Tf::Rg16Float
| Tf::Rg32Float
| Tf::Rgba8Unorm
| Tf::Rgba8UnormSrgb
| Tf::Bgra8Unorm
| Tf::Bgra8UnormSrgb
| Tf::Rgb10a2Unorm
| Tf::Rgba16Float
| Tf::Rgba32Float => true,
_ => false,
}
}
pub fn map_buffer_usage(usage: wgt::BufferUsages) -> hal::BufferUses {
let mut u = hal::BufferUses::empty();
u.set(
hal::BufferUses::MAP_READ,
usage.contains(wgt::BufferUsages::MAP_READ),
);
u.set(
hal::BufferUses::MAP_WRITE,
usage.contains(wgt::BufferUsages::MAP_WRITE),
);
u.set(
hal::BufferUses::COPY_SRC,
usage.contains(wgt::BufferUsages::COPY_SRC),
);
u.set(
hal::BufferUses::COPY_DST,
usage.contains(wgt::BufferUsages::COPY_DST),
);
u.set(
hal::BufferUses::INDEX,
usage.contains(wgt::BufferUsages::INDEX),
);
u.set(
hal::BufferUses::VERTEX,
usage.contains(wgt::BufferUsages::VERTEX),
);
u.set(
hal::BufferUses::UNIFORM,
usage.contains(wgt::BufferUsages::UNIFORM),
);
u.set(
hal::BufferUses::STORAGE_READ_ONLY | hal::BufferUses::STORAGE_READ_WRITE,
usage.contains(wgt::BufferUsages::STORAGE),
);
u.set(
hal::BufferUses::INDIRECT,
usage.contains(wgt::BufferUsages::INDIRECT),
);
u.set(
hal::BufferUses::QUERY_RESOLVE,
usage.contains(wgt::BufferUsages::QUERY_RESOLVE),
);
u.set(
hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT,
usage.contains(wgt::BufferUsages::BLAS_INPUT),
);
u.set(
hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
usage.contains(wgt::BufferUsages::TLAS_INPUT),
);
u
}
pub fn map_texture_usage(
usage: wgt::TextureUsages,
aspect: hal::FormatAspects,
flags: wgt::TextureFormatFeatureFlags,
) -> hal::TextureUses {
let mut u = hal::TextureUses::empty();
u.set(
hal::TextureUses::COPY_SRC,
usage.contains(wgt::TextureUsages::COPY_SRC),
);
u.set(
hal::TextureUses::COPY_DST,
usage.contains(wgt::TextureUsages::COPY_DST),
);
u.set(
hal::TextureUses::RESOURCE,
usage.contains(wgt::TextureUsages::TEXTURE_BINDING),
);
if usage.contains(wgt::TextureUsages::STORAGE_BINDING) {
u.set(
hal::TextureUses::STORAGE_READ_ONLY,
flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_ONLY),
);
u.set(
hal::TextureUses::STORAGE_WRITE_ONLY,
flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_WRITE_ONLY),
);
u.set(
hal::TextureUses::STORAGE_READ_WRITE,
flags.contains(wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE),
);
}
let is_color = aspect.contains(hal::FormatAspects::COLOR);
u.set(
hal::TextureUses::COLOR_TARGET,
usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && is_color,
);
u.set(
hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::DEPTH_STENCIL_WRITE,
usage.contains(wgt::TextureUsages::RENDER_ATTACHMENT) && !is_color,
);
u.set(
hal::TextureUses::STORAGE_ATOMIC,
usage.contains(wgt::TextureUsages::STORAGE_ATOMIC),
);
u
}
pub fn map_texture_usage_for_texture(
desc: &TextureDescriptor,
format_features: &TextureFormatFeatures,
) -> hal::TextureUses {
// Enforce having COPY_DST/DEPTH_STENCIL_WRITE/COLOR_TARGET otherwise we
// wouldn't be able to initialize the texture.
map_texture_usage(desc.usage, desc.format.into(), format_features.flags)
| if desc.format.is_depth_stencil_format() {
hal::TextureUses::DEPTH_STENCIL_WRITE
} else if desc.usage.contains(wgt::TextureUsages::COPY_DST) {
hal::TextureUses::COPY_DST // (set already)
} else {
// Use COPY_DST only if we can't use COLOR_TARGET
if format_features
.allowed_usages
.contains(wgt::TextureUsages::RENDER_ATTACHMENT)
&& desc.dimension == wgt::TextureDimension::D2
// Render targets dimension must be 2d
{
hal::TextureUses::COLOR_TARGET
} else {
hal::TextureUses::COPY_DST
}
}
}
pub fn map_texture_usage_from_hal(uses: hal::TextureUses) -> wgt::TextureUsages {
let mut u = wgt::TextureUsages::empty();
u.set(
wgt::TextureUsages::COPY_SRC,
uses.contains(hal::TextureUses::COPY_SRC),
);
u.set(
wgt::TextureUsages::COPY_DST,
uses.contains(hal::TextureUses::COPY_DST),
);
u.set(
wgt::TextureUsages::TEXTURE_BINDING,
uses.contains(hal::TextureUses::RESOURCE),
);
u.set(
wgt::TextureUsages::STORAGE_BINDING,
uses.intersects(
hal::TextureUses::STORAGE_READ_ONLY
| hal::TextureUses::STORAGE_WRITE_ONLY
| hal::TextureUses::STORAGE_READ_WRITE,
),
);
u.set(
wgt::TextureUsages::RENDER_ATTACHMENT,
uses.contains(hal::TextureUses::COLOR_TARGET),
);
u.set(
wgt::TextureUsages::STORAGE_ATOMIC,
uses.contains(hal::TextureUses::STORAGE_ATOMIC),
);
u
}
pub fn check_texture_dimension_size(
dimension: wgt::TextureDimension,
wgt::Extent3d {
width,
height,
depth_or_array_layers,
}: wgt::Extent3d,
sample_size: u32,
limits: &wgt::Limits,
) -> Result<(), resource::TextureDimensionError> {
use resource::{TextureDimensionError as Tde, TextureErrorDimension as Ted};
use wgt::TextureDimension::*;
let (extent_limits, sample_limit) = match dimension {
D1 => ([limits.max_texture_dimension_1d, 1, 1], 1),
D2 => (
[
limits.max_texture_dimension_2d,
limits.max_texture_dimension_2d,
limits.max_texture_array_layers,
],
32,
),
D3 => (
[
limits.max_texture_dimension_3d,
limits.max_texture_dimension_3d,
limits.max_texture_dimension_3d,
],
1,
),
};
for (&dim, (&given, &limit)) in [Ted::X, Ted::Y, Ted::Z].iter().zip(
[width, height, depth_or_array_layers]
.iter()
.zip(extent_limits.iter()),
) {
if given == 0 {
return Err(Tde::Zero(dim));
}
if given > limit {
return Err(Tde::LimitExceeded { dim, given, limit });
}
}
if sample_size == 0 || sample_size > sample_limit || !sample_size.is_power_of_two() {
return Err(Tde::InvalidSampleCount(sample_size));
}
Ok(())
}
pub fn bind_group_layout_flags(features: wgt::Features) -> hal::BindGroupLayoutFlags {
let mut flags = hal::BindGroupLayoutFlags::empty();
flags.set(
hal::BindGroupLayoutFlags::PARTIALLY_BOUND,
features.contains(wgt::Features::PARTIALLY_BOUND_BINDING_ARRAY),
);
flags
}

134
vendor/wgpu-core/src/device/bgl.rs vendored Normal file
View File

@@ -0,0 +1,134 @@
use std::hash::{Hash, Hasher};
use crate::{
binding_model::{self},
FastIndexMap,
};
/// Where a given BGL came from.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Origin {
/// The bind group layout was created by the user and is present in the BGL resource pool.
Pool,
/// The bind group layout was derived and is not present in the BGL resource pool.
Derived,
}
/// A HashMap-like structure that stores a BindGroupLayouts [`wgt::BindGroupLayoutEntry`]s.
///
/// It is hashable, so bind group layouts can be deduplicated.
#[derive(Debug, Default, Clone, Eq)]
pub struct EntryMap {
/// We use a IndexMap here so that we can sort the entries by their binding index,
/// guaranteeing that the hash of equivalent layouts will be the same.
inner: FastIndexMap<u32, wgt::BindGroupLayoutEntry>,
/// We keep track of whether the map is sorted or not, so that we can assert that
/// it is sorted, so that PartialEq and Hash will be stable.
///
/// We only need sorted if it is used in a Hash or PartialEq, so we never need
/// to actively sort it.
sorted: bool,
}
impl PartialEq for EntryMap {
fn eq(&self, other: &Self) -> bool {
self.assert_sorted();
other.assert_sorted();
self.inner == other.inner
}
}
impl Hash for EntryMap {
fn hash<H: Hasher>(&self, state: &mut H) {
self.assert_sorted();
// We don't need to hash the keys, since they are just extracted from the values.
//
// We know this is stable and will match the behavior of PartialEq as we ensure
// that the array is sorted.
for entry in self.inner.values() {
entry.hash(state);
}
}
}
impl EntryMap {
fn assert_sorted(&self) {
assert!(self.sorted);
}
/// Create a new [`EntryMap`] from a slice of [`wgt::BindGroupLayoutEntry`]s.
///
/// Errors if there are duplicate bindings or if any binding index is greater than
/// the device's limits.
pub fn from_entries(
device_limits: &wgt::Limits,
entries: &[wgt::BindGroupLayoutEntry],
) -> Result<Self, binding_model::CreateBindGroupLayoutError> {
let mut inner = FastIndexMap::with_capacity_and_hasher(entries.len(), Default::default());
for entry in entries {
if entry.binding >= device_limits.max_bindings_per_bind_group {
return Err(
binding_model::CreateBindGroupLayoutError::InvalidBindingIndex {
binding: entry.binding,
maximum: device_limits.max_bindings_per_bind_group,
},
);
}
if inner.insert(entry.binding, *entry).is_some() {
return Err(binding_model::CreateBindGroupLayoutError::ConflictBinding(
entry.binding,
));
}
}
inner.sort_unstable_keys();
Ok(Self {
inner,
sorted: true,
})
}
/// Get the count of [`wgt::BindGroupLayoutEntry`]s in this map.
pub fn len(&self) -> usize {
self.inner.len()
}
/// Get the [`wgt::BindGroupLayoutEntry`] for the given binding index.
pub fn get(&self, binding: u32) -> Option<&wgt::BindGroupLayoutEntry> {
self.inner.get(&binding)
}
/// Iterator over all the binding indices in this map.
pub fn indices(&self) -> impl ExactSizeIterator<Item = u32> + '_ {
self.inner.keys().copied()
}
/// Iterator over all the [`wgt::BindGroupLayoutEntry`]s in this map.
pub fn values(&self) -> impl ExactSizeIterator<Item = &wgt::BindGroupLayoutEntry> + '_ {
self.inner.values()
}
pub fn iter(&self) -> impl ExactSizeIterator<Item = (&u32, &wgt::BindGroupLayoutEntry)> + '_ {
self.inner.iter()
}
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn contains_key(&self, key: u32) -> bool {
self.inner.contains_key(&key)
}
pub fn entry(&mut self, key: u32) -> indexmap::map::Entry<'_, u32, wgt::BindGroupLayoutEntry> {
self.sorted = false;
self.inner.entry(key)
}
pub fn sort(&mut self) {
self.inner.sort_unstable_keys();
self.sorted = true;
}
}

2272
vendor/wgpu-core/src/device/global.rs vendored Normal file

File diff suppressed because it is too large Load Diff

334
vendor/wgpu-core/src/device/life.rs vendored Normal file
View File

@@ -0,0 +1,334 @@
use crate::{
device::{
queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
DeviceError,
},
resource::{Buffer, Texture, Trackable},
snatch::SnatchGuard,
SubmissionIndex,
};
use smallvec::SmallVec;
use std::sync::Arc;
use thiserror::Error;
/// A command submitted to the GPU for execution.
///
/// ## Keeping resources alive while the GPU is using them
///
/// [`wgpu_hal`] requires that, when a command is submitted to a queue, all the
/// resources it uses must remain alive until it has finished executing.
///
/// [`wgpu_hal`]: hal
/// [`ResourceInfo::submission_index`]: crate::resource::ResourceInfo
struct ActiveSubmission {
/// The index of the submission we track.
///
/// When `Device::fence`'s value is greater than or equal to this, our queue
/// submission has completed.
index: SubmissionIndex,
/// Buffers to be mapped once this submission has completed.
mapped: Vec<Arc<Buffer>>,
/// Command buffers used by this submission, and the encoder that owns them.
///
/// [`wgpu_hal::Queue::submit`] requires the submitted command buffers to
/// remain alive until the submission has completed execution. Command
/// encoders double as allocation pools for command buffers, so holding them
/// here and cleaning them up in [`LifetimeTracker::triage_submissions`]
/// satisfies that requirement.
///
/// Once this submission has completed, the command buffers are reset and
/// the command encoder is recycled.
///
/// [`wgpu_hal::Queue::submit`]: hal::Queue::submit
encoders: Vec<EncoderInFlight>,
/// List of queue "on_submitted_work_done" closures to be called once this
/// submission has completed.
work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
}
impl ActiveSubmission {
/// Returns true if this submission contains the given buffer.
///
/// This only uses constant-time operations.
pub fn contains_buffer(&self, buffer: &Buffer) -> bool {
for encoder in &self.encoders {
// The ownership location of buffers depends on where the command encoder
// came from. If it is the staging command encoder on the queue, it is
// in the pending buffer list. If it came from a user command encoder,
// it is in the tracker.
if encoder.trackers.buffers.contains(buffer) {
return true;
}
if encoder
.pending_buffers
.contains_key(&buffer.tracker_index())
{
return true;
}
}
false
}
/// Returns true if this submission contains the given texture.
///
/// This only uses constant-time operations.
pub fn contains_texture(&self, texture: &Texture) -> bool {
for encoder in &self.encoders {
// The ownership location of textures depends on where the command encoder
// came from. If it is the staging command encoder on the queue, it is
// in the pending buffer list. If it came from a user command encoder,
// it is in the tracker.
if encoder.trackers.textures.contains(texture) {
return true;
}
if encoder
.pending_textures
.contains_key(&texture.tracker_index())
{
return true;
}
}
false
}
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum WaitIdleError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Tried to wait using a submission index ({0}) that has not been returned by a successful submission (last successful submission: {1})")]
WrongSubmissionIndex(SubmissionIndex, SubmissionIndex),
}
/// Resource tracking for a device.
///
/// ## Host mapping buffers
///
/// A buffer cannot be mapped until all active queue submissions that use it
/// have completed. To that end:
///
/// - Each buffer's `ResourceInfo::submission_index` records the index of the
/// most recent queue submission that uses that buffer.
///
/// - When the device is polled, the following `LifetimeTracker` methods decide
/// what should happen next:
///
/// 1) `triage_submissions` moves entries in `self.active[i]` for completed
/// submissions to `self.ready_to_map`. At this point, both
/// `self.active` and `self.ready_to_map` are up to date with the given
/// submission index.
///
/// 2) `handle_mapping` drains `self.ready_to_map` and actually maps the
/// buffers, collecting a list of notification closures to call.
///
/// Only calling `Global::buffer_map_async` clones a new `Arc` for the
/// buffer. This new `Arc` is only dropped by `handle_mapping`.
pub(crate) struct LifetimeTracker {
/// Resources used by queue submissions still in flight. One entry per
/// submission, with older submissions appearing before younger.
///
/// Entries are added by `track_submission` and drained by
/// `LifetimeTracker::triage_submissions`. Lots of methods contribute data
/// to particular entries.
active: Vec<ActiveSubmission>,
/// Buffers the user has asked us to map, and which are not used by any
/// queue submission still in flight.
ready_to_map: Vec<Arc<Buffer>>,
/// Queue "on_submitted_work_done" closures that were initiated for while there is no
/// currently pending submissions. These cannot be immediately invoked as they
/// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
/// here until the next time the device is maintained.
work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
}
impl LifetimeTracker {
pub fn new() -> Self {
Self {
active: Vec::new(),
ready_to_map: Vec::new(),
work_done_closures: SmallVec::new(),
}
}
/// Return true if there are no queue submissions still in flight.
pub fn queue_empty(&self) -> bool {
self.active.is_empty()
}
/// Start tracking resources associated with a new queue submission.
pub fn track_submission(&mut self, index: SubmissionIndex, encoders: Vec<EncoderInFlight>) {
self.active.push(ActiveSubmission {
index,
mapped: Vec::new(),
encoders,
work_done_closures: SmallVec::new(),
});
}
pub(crate) fn map(&mut self, buffer: &Arc<Buffer>) -> Option<SubmissionIndex> {
// Determine which buffers are ready to map, and which must wait for the GPU.
let submission = self
.active
.iter_mut()
.rev()
.find(|a| a.contains_buffer(buffer));
let maybe_submission_index = submission.as_ref().map(|s| s.index);
submission
.map_or(&mut self.ready_to_map, |a| &mut a.mapped)
.push(buffer.clone());
maybe_submission_index
}
/// Returns the submission index of the most recent submission that uses the
/// given buffer.
pub fn get_buffer_latest_submission_index(&self, buffer: &Buffer) -> Option<SubmissionIndex> {
// We iterate in reverse order, so that we can bail out early as soon
// as we find a hit.
self.active.iter().rev().find_map(|submission| {
if submission.contains_buffer(buffer) {
Some(submission.index)
} else {
None
}
})
}
/// Returns the submission index of the most recent submission that uses the
/// given texture.
pub fn get_texture_latest_submission_index(
&self,
texture: &Texture,
) -> Option<SubmissionIndex> {
// We iterate in reverse order, so that we can bail out early as soon
// as we find a hit.
self.active.iter().rev().find_map(|submission| {
if submission.contains_texture(texture) {
Some(submission.index)
} else {
None
}
})
}
/// Sort out the consequences of completed submissions.
///
/// Assume that all submissions up through `last_done` have completed.
///
/// - Buffers used by those submissions are now ready to map, if requested.
/// Add any buffers in the submission's [`mapped`] list to
/// [`self.ready_to_map`], where [`LifetimeTracker::handle_mapping`]
/// will find them.
///
/// Return a list of [`SubmittedWorkDoneClosure`]s to run.
///
/// [`mapped`]: ActiveSubmission::mapped
/// [`self.ready_to_map`]: LifetimeTracker::ready_to_map
/// [`SubmittedWorkDoneClosure`]: crate::device::queue::SubmittedWorkDoneClosure
#[must_use]
pub fn triage_submissions(
&mut self,
last_done: SubmissionIndex,
) -> SmallVec<[SubmittedWorkDoneClosure; 1]> {
profiling::scope!("triage_submissions");
//TODO: enable when `is_sorted_by_key` is stable
//debug_assert!(self.active.is_sorted_by_key(|a| a.index));
let done_count = self
.active
.iter()
.position(|a| a.index > last_done)
.unwrap_or(self.active.len());
let mut work_done_closures: SmallVec<_> = self.work_done_closures.drain(..).collect();
for a in self.active.drain(..done_count) {
self.ready_to_map.extend(a.mapped);
for encoder in a.encoders {
// This involves actually decrementing the ref count of all command buffer
// resources, so can be _very_ expensive.
profiling::scope!("drop command buffer trackers");
drop(encoder);
}
work_done_closures.extend(a.work_done_closures);
}
work_done_closures
}
pub fn schedule_resource_destruction(
&mut self,
temp_resource: TempResource,
last_submit_index: SubmissionIndex,
) {
let resources = self
.active
.iter_mut()
.find(|a| a.index == last_submit_index)
.map(|a| {
// Because this resource's `last_submit_index` matches `a.index`,
// we know that we must have done something with the resource,
// so `a.encoders` should not be empty.
&mut a.encoders.last_mut().unwrap().temp_resources
});
if let Some(resources) = resources {
resources.push(temp_resource);
}
}
pub fn add_work_done_closure(
&mut self,
closure: SubmittedWorkDoneClosure,
) -> Option<SubmissionIndex> {
match self.active.last_mut() {
Some(active) => {
active.work_done_closures.push(closure);
Some(active.index)
}
// We must defer the closure until all previously occurring map_async closures
// have fired. This is required by the spec.
None => {
self.work_done_closures.push(closure);
None
}
}
}
/// Map the buffers in `self.ready_to_map`.
///
/// Return a list of mapping notifications to send.
///
/// See the documentation for [`LifetimeTracker`] for details.
#[must_use]
pub(crate) fn handle_mapping(
&mut self,
snatch_guard: &SnatchGuard,
) -> Vec<super::BufferMapPendingClosure> {
if self.ready_to_map.is_empty() {
return Vec::new();
}
let mut pending_callbacks: Vec<super::BufferMapPendingClosure> =
Vec::with_capacity(self.ready_to_map.len());
for buffer in self.ready_to_map.drain(..) {
match buffer.map(snatch_guard) {
Some(cb) => pending_callbacks.push(cb),
None => continue,
}
}
pending_callbacks
}
}

470
vendor/wgpu-core/src/device/mod.rs vendored Normal file
View File

@@ -0,0 +1,470 @@
use crate::{
binding_model,
hub::Hub,
id::{BindGroupLayoutId, PipelineLayoutId},
resource::{
Buffer, BufferAccessError, BufferAccessResult, BufferMapOperation, Labeled,
ResourceErrorIdent,
},
snatch::SnatchGuard,
Label, DOWNLEVEL_ERROR_MESSAGE,
};
use arrayvec::ArrayVec;
use smallvec::SmallVec;
use thiserror::Error;
use wgt::{BufferAddress, DeviceLostReason, TextureFormat};
use std::num::NonZeroU32;
pub(crate) mod bgl;
pub mod global;
mod life;
pub mod queue;
pub mod ray_tracing;
pub mod resource;
#[cfg(any(feature = "trace", feature = "replay"))]
pub mod trace;
pub use {life::WaitIdleError, resource::Device};
pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
// Should be large enough for the largest possible texture row. This
// value is enough for a 16k texture with float4 format.
pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
// If a submission is not completed within this time, we go off into UB land.
// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
const CLEANUP_WAIT_MS: u32 = 60000;
pub(crate) const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";
pub type DeviceDescriptor<'a> = wgt::DeviceDescriptor<Label<'a>>;
#[repr(C)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum HostMap {
Read,
Write,
}
#[derive(Clone, Debug, Hash, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub(crate) struct AttachmentData<T> {
pub colors: ArrayVec<Option<T>, { hal::MAX_COLOR_ATTACHMENTS }>,
pub resolves: ArrayVec<T, { hal::MAX_COLOR_ATTACHMENTS }>,
pub depth_stencil: Option<T>,
}
impl<T: PartialEq> Eq for AttachmentData<T> {}
#[derive(Clone, Debug, Hash, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub(crate) struct RenderPassContext {
pub attachments: AttachmentData<TextureFormat>,
pub sample_count: u32,
pub multiview: Option<NonZeroU32>,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum RenderPassCompatibilityError {
#[error(
"Incompatible color attachments at indices {indices:?}: the RenderPass uses textures with formats {expected:?} but the {res} uses attachments with formats {actual:?}",
)]
IncompatibleColorAttachment {
indices: Vec<usize>,
expected: Vec<Option<TextureFormat>>,
actual: Vec<Option<TextureFormat>>,
res: ResourceErrorIdent,
},
#[error(
"Incompatible depth-stencil attachment format: the RenderPass uses a texture with format {expected:?} but the {res} uses an attachment with format {actual:?}",
)]
IncompatibleDepthStencilAttachment {
expected: Option<TextureFormat>,
actual: Option<TextureFormat>,
res: ResourceErrorIdent,
},
#[error(
"Incompatible sample count: the RenderPass uses textures with sample count {expected:?} but the {res} uses attachments with format {actual:?}",
)]
IncompatibleSampleCount {
expected: u32,
actual: u32,
res: ResourceErrorIdent,
},
#[error("Incompatible multiview setting: the RenderPass uses setting {expected:?} but the {res} uses setting {actual:?}")]
IncompatibleMultiview {
expected: Option<NonZeroU32>,
actual: Option<NonZeroU32>,
res: ResourceErrorIdent,
},
}
impl RenderPassContext {
// Assumes the renderpass only contains one subpass
pub(crate) fn check_compatible<T: Labeled>(
&self,
other: &Self,
res: &T,
) -> Result<(), RenderPassCompatibilityError> {
if self.attachments.colors != other.attachments.colors {
let indices = self
.attachments
.colors
.iter()
.zip(&other.attachments.colors)
.enumerate()
.filter_map(|(idx, (left, right))| (left != right).then_some(idx))
.collect();
return Err(RenderPassCompatibilityError::IncompatibleColorAttachment {
indices,
expected: self.attachments.colors.iter().cloned().collect(),
actual: other.attachments.colors.iter().cloned().collect(),
res: res.error_ident(),
});
}
if self.attachments.depth_stencil != other.attachments.depth_stencil {
return Err(
RenderPassCompatibilityError::IncompatibleDepthStencilAttachment {
expected: self.attachments.depth_stencil,
actual: other.attachments.depth_stencil,
res: res.error_ident(),
},
);
}
if self.sample_count != other.sample_count {
return Err(RenderPassCompatibilityError::IncompatibleSampleCount {
expected: self.sample_count,
actual: other.sample_count,
res: res.error_ident(),
});
}
if self.multiview != other.multiview {
return Err(RenderPassCompatibilityError::IncompatibleMultiview {
expected: self.multiview,
actual: other.multiview,
res: res.error_ident(),
});
}
Ok(())
}
}
pub type BufferMapPendingClosure = (BufferMapOperation, BufferAccessResult);
#[derive(Default)]
pub struct UserClosures {
pub mappings: Vec<BufferMapPendingClosure>,
pub submissions: SmallVec<[queue::SubmittedWorkDoneClosure; 1]>,
pub device_lost_invocations: SmallVec<[DeviceLostInvocation; 1]>,
}
impl UserClosures {
fn extend(&mut self, other: Self) {
self.mappings.extend(other.mappings);
self.submissions.extend(other.submissions);
self.device_lost_invocations
.extend(other.device_lost_invocations);
}
fn fire(self) {
// Note: this logic is specifically moved out of `handle_mapping()` in order to
// have nothing locked by the time we execute users callback code.
// Mappings _must_ be fired before submissions, as the spec requires all mapping callbacks that are registered before
// a on_submitted_work_done callback to be fired before the on_submitted_work_done callback.
for (mut operation, status) in self.mappings {
if let Some(callback) = operation.callback.take() {
callback(status);
}
}
for closure in self.submissions {
closure();
}
for invocation in self.device_lost_invocations {
(invocation.closure)(invocation.reason, invocation.message);
}
}
}
#[cfg(send_sync)]
pub type DeviceLostClosure = Box<dyn FnOnce(DeviceLostReason, String) + Send + 'static>;
#[cfg(not(send_sync))]
pub type DeviceLostClosure = Box<dyn FnOnce(DeviceLostReason, String) + 'static>;
pub struct DeviceLostInvocation {
closure: DeviceLostClosure,
reason: DeviceLostReason,
message: String,
}
pub(crate) fn map_buffer(
buffer: &Buffer,
offset: BufferAddress,
size: BufferAddress,
kind: HostMap,
snatch_guard: &SnatchGuard,
) -> Result<hal::BufferMapping, BufferAccessError> {
let raw_device = buffer.device.raw();
let raw_buffer = buffer.try_raw(snatch_guard)?;
let mapping = unsafe {
raw_device
.map_buffer(raw_buffer, offset..offset + size)
.map_err(|e| buffer.device.handle_hal_error(e))?
};
if !mapping.is_coherent && kind == HostMap::Read {
#[allow(clippy::single_range_in_vec_init)]
unsafe {
raw_device.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
}
}
assert_eq!(offset % wgt::COPY_BUFFER_ALIGNMENT, 0);
assert_eq!(size % wgt::COPY_BUFFER_ALIGNMENT, 0);
// Zero out uninitialized parts of the mapping. (Spec dictates all resources
// behave as if they were initialized with zero)
//
// If this is a read mapping, ideally we would use a `clear_buffer` command
// before reading the data from GPU (i.e. `invalidate_range`). However, this
// would require us to kick off and wait for a command buffer or piggy back
// on an existing one (the later is likely the only worthwhile option). As
// reading uninitialized memory isn't a particular important path to
// support, we instead just initialize the memory here and make sure it is
// GPU visible, so this happens at max only once for every buffer region.
//
// If this is a write mapping zeroing out the memory here is the only
// reasonable way as all data is pushed to GPU anyways.
let mapped = unsafe { std::slice::from_raw_parts_mut(mapping.ptr.as_ptr(), size as usize) };
// We can't call flush_mapped_ranges in this case, so we can't drain the uninitialized ranges either
if !mapping.is_coherent
&& kind == HostMap::Read
&& !buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
{
for uninitialized in buffer
.initialization_status
.write()
.uninitialized(offset..(size + offset))
{
// The mapping's pointer is already offset, however we track the
// uninitialized range relative to the buffer's start.
let fill_range =
(uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
mapped[fill_range].fill(0);
}
} else {
for uninitialized in buffer
.initialization_status
.write()
.drain(offset..(size + offset))
{
// The mapping's pointer is already offset, however we track the
// uninitialized range relative to the buffer's start.
let fill_range =
(uninitialized.start - offset) as usize..(uninitialized.end - offset) as usize;
mapped[fill_range].fill(0);
// NOTE: This is only possible when MAPPABLE_PRIMARY_BUFFERS is enabled.
if !mapping.is_coherent
&& kind == HostMap::Read
&& buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
{
unsafe { raw_device.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
}
}
}
Ok(mapping)
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct DeviceMismatch {
pub(super) res: ResourceErrorIdent,
pub(super) res_device: ResourceErrorIdent,
pub(super) target: Option<ResourceErrorIdent>,
pub(super) target_device: ResourceErrorIdent,
}
impl std::fmt::Display for DeviceMismatch {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
write!(
f,
"{} of {} doesn't match {}",
self.res_device, self.res, self.target_device
)?;
if let Some(target) = self.target.as_ref() {
write!(f, " of {target}")?;
}
Ok(())
}
}
impl std::error::Error for DeviceMismatch {}
#[derive(Clone, Debug, Error)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum DeviceError {
#[error("{0} is invalid.")]
Invalid(ResourceErrorIdent),
#[error("Parent device is lost")]
Lost,
#[error("Not enough memory left.")]
OutOfMemory,
#[error("Creation of a resource failed for a reason other than running out of memory.")]
ResourceCreationFailed,
#[error(transparent)]
DeviceMismatch(#[from] Box<DeviceMismatch>),
}
impl DeviceError {
/// Only use this function in contexts where there is no `Device`.
///
/// Use [`Device::handle_hal_error`] otherwise.
pub fn from_hal(error: hal::DeviceError) -> Self {
match error {
hal::DeviceError::Lost => Self::Lost,
hal::DeviceError::OutOfMemory => Self::OutOfMemory,
hal::DeviceError::ResourceCreationFailed => Self::ResourceCreationFailed,
hal::DeviceError::Unexpected => Self::Lost,
}
}
}
#[derive(Clone, Debug, Error)]
#[error("Features {0:?} are required but not enabled on the device")]
pub struct MissingFeatures(pub wgt::Features);
#[derive(Clone, Debug, Error)]
#[error(
"Downlevel flags {0:?} are required but not supported on the device.\n{DOWNLEVEL_ERROR_MESSAGE}",
)]
pub struct MissingDownlevelFlags(pub wgt::DownlevelFlags);
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ImplicitPipelineContext {
pub root_id: PipelineLayoutId,
pub group_ids: ArrayVec<BindGroupLayoutId, { hal::MAX_BIND_GROUPS }>,
}
pub struct ImplicitPipelineIds<'a> {
pub root_id: PipelineLayoutId,
pub group_ids: &'a [BindGroupLayoutId],
}
impl ImplicitPipelineIds<'_> {
fn prepare(self, hub: &Hub) -> ImplicitPipelineContext {
ImplicitPipelineContext {
root_id: hub.pipeline_layouts.prepare(Some(self.root_id)).id(),
group_ids: self
.group_ids
.iter()
.map(|id_in| hub.bind_group_layouts.prepare(Some(*id_in)).id())
.collect(),
}
}
}
/// Create a validator with the given validation flags.
pub fn create_validator(
features: wgt::Features,
downlevel: wgt::DownlevelFlags,
flags: naga::valid::ValidationFlags,
) -> naga::valid::Validator {
use naga::valid::Capabilities as Caps;
let mut caps = Caps::empty();
caps.set(
Caps::PUSH_CONSTANT,
features.contains(wgt::Features::PUSH_CONSTANTS),
);
caps.set(Caps::FLOAT64, features.contains(wgt::Features::SHADER_F64));
caps.set(
Caps::PRIMITIVE_INDEX,
features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX),
);
caps.set(
Caps::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
features
.contains(wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING),
);
caps.set(
Caps::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING,
features
.contains(wgt::Features::UNIFORM_BUFFER_AND_STORAGE_TEXTURE_ARRAY_NON_UNIFORM_INDEXING),
);
// TODO: This needs a proper wgpu feature
caps.set(
Caps::SAMPLER_NON_UNIFORM_INDEXING,
features
.contains(wgt::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING),
);
caps.set(
Caps::STORAGE_TEXTURE_16BIT_NORM_FORMATS,
features.contains(wgt::Features::TEXTURE_FORMAT_16BIT_NORM),
);
caps.set(Caps::MULTIVIEW, features.contains(wgt::Features::MULTIVIEW));
caps.set(
Caps::EARLY_DEPTH_TEST,
features.contains(wgt::Features::SHADER_EARLY_DEPTH_TEST),
);
caps.set(
Caps::SHADER_INT64,
features.contains(wgt::Features::SHADER_INT64),
);
caps.set(
Caps::SHADER_INT64_ATOMIC_MIN_MAX,
features.intersects(
wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS,
),
);
caps.set(
Caps::SHADER_INT64_ATOMIC_ALL_OPS,
features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS),
);
caps.set(
Caps::TEXTURE_ATOMIC,
features.contains(wgt::Features::TEXTURE_ATOMIC),
);
caps.set(
Caps::TEXTURE_INT64_ATOMIC,
features.contains(wgt::Features::TEXTURE_INT64_ATOMIC),
);
caps.set(
Caps::SHADER_FLOAT32_ATOMIC,
features.contains(wgt::Features::SHADER_FLOAT32_ATOMIC),
);
caps.set(
Caps::MULTISAMPLED_SHADING,
downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING),
);
caps.set(
Caps::DUAL_SOURCE_BLENDING,
features.contains(wgt::Features::DUAL_SOURCE_BLENDING),
);
caps.set(
Caps::CUBE_ARRAY_TEXTURES,
downlevel.contains(wgt::DownlevelFlags::CUBE_ARRAY_TEXTURES),
);
caps.set(
Caps::SUBGROUP,
features.intersects(wgt::Features::SUBGROUP | wgt::Features::SUBGROUP_VERTEX),
);
caps.set(
Caps::SUBGROUP_BARRIER,
features.intersects(wgt::Features::SUBGROUP_BARRIER),
);
caps.set(
Caps::RAY_QUERY,
features.intersects(wgt::Features::EXPERIMENTAL_RAY_QUERY),
);
caps.set(
Caps::SUBGROUP_VERTEX_STAGE,
features.contains(wgt::Features::SUBGROUP_VERTEX),
);
naga::valid::Validator::new(flags, caps)
}

1555
vendor/wgpu-core/src/device/queue.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,275 @@
use std::mem::ManuallyDrop;
use std::sync::Arc;
use crate::api_log;
#[cfg(feature = "trace")]
use crate::device::trace;
use crate::lock::rank;
use crate::resource::{Fallible, TrackingData};
use crate::snatch::Snatchable;
use crate::{
device::{Device, DeviceError},
global::Global,
id::{self, BlasId, TlasId},
lock::RwLock,
ray_tracing::{CreateBlasError, CreateTlasError},
resource, LabelHelpers,
};
use hal::AccelerationStructureTriangleIndices;
use wgt::Features;
impl Device {
fn create_blas(
self: &Arc<Self>,
blas_desc: &resource::BlasDescriptor,
sizes: wgt::BlasGeometrySizeDescriptors,
) -> Result<Arc<resource::Blas>, CreateBlasError> {
self.check_is_valid()?;
self.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?;
let size_info = match &sizes {
wgt::BlasGeometrySizeDescriptors::Triangles { descriptors } => {
let mut entries =
Vec::<hal::AccelerationStructureTriangles<dyn hal::DynBuffer>>::with_capacity(
descriptors.len(),
);
for desc in descriptors {
if desc.index_count.is_some() != desc.index_format.is_some() {
return Err(CreateBlasError::MissingIndexData);
}
let indices =
desc.index_count
.map(|count| AccelerationStructureTriangleIndices::<
dyn hal::DynBuffer,
> {
format: desc.index_format.unwrap(),
buffer: None,
offset: 0,
count,
});
if !self
.features
.allowed_vertex_formats_for_blas()
.contains(&desc.vertex_format)
{
return Err(CreateBlasError::InvalidVertexFormat(
desc.vertex_format,
self.features.allowed_vertex_formats_for_blas(),
));
}
entries.push(hal::AccelerationStructureTriangles::<dyn hal::DynBuffer> {
vertex_buffer: None,
vertex_format: desc.vertex_format,
first_vertex: 0,
vertex_count: desc.vertex_count,
vertex_stride: 0,
indices,
transform: None,
flags: desc.flags,
});
}
unsafe {
self.raw().get_acceleration_structure_build_sizes(
&hal::GetAccelerationStructureBuildSizesDescriptor {
entries: &hal::AccelerationStructureEntries::Triangles(entries),
flags: blas_desc.flags,
},
)
}
}
};
let raw = unsafe {
self.raw()
.create_acceleration_structure(&hal::AccelerationStructureDescriptor {
label: blas_desc.label.as_deref(),
size: size_info.acceleration_structure_size,
format: hal::AccelerationStructureFormat::BottomLevel,
})
}
.map_err(DeviceError::from_hal)?;
let handle = unsafe {
self.raw()
.get_acceleration_structure_device_address(raw.as_ref())
};
Ok(Arc::new(resource::Blas {
raw: Snatchable::new(raw),
device: self.clone(),
size_info,
sizes,
flags: blas_desc.flags,
update_mode: blas_desc.update_mode,
handle,
label: blas_desc.label.to_string(),
built_index: RwLock::new(rank::BLAS_BUILT_INDEX, None),
tracking_data: TrackingData::new(self.tracker_indices.blas_s.clone()),
}))
}
fn create_tlas(
self: &Arc<Self>,
desc: &resource::TlasDescriptor,
) -> Result<Arc<resource::Tlas>, CreateTlasError> {
self.check_is_valid()?;
self.require_features(Features::EXPERIMENTAL_RAY_TRACING_ACCELERATION_STRUCTURE)?;
let size_info = unsafe {
self.raw().get_acceleration_structure_build_sizes(
&hal::GetAccelerationStructureBuildSizesDescriptor {
entries: &hal::AccelerationStructureEntries::Instances(
hal::AccelerationStructureInstances {
buffer: None,
offset: 0,
count: desc.max_instances,
},
),
flags: desc.flags,
},
)
};
let raw = unsafe {
self.raw()
.create_acceleration_structure(&hal::AccelerationStructureDescriptor {
label: desc.label.as_deref(),
size: size_info.acceleration_structure_size,
format: hal::AccelerationStructureFormat::TopLevel,
})
}
.map_err(DeviceError::from_hal)?;
let instance_buffer_size =
self.alignments.raw_tlas_instance_size * desc.max_instances.max(1) as usize;
let instance_buffer = unsafe {
self.raw().create_buffer(&hal::BufferDescriptor {
label: Some("(wgpu-core) instances_buffer"),
size: instance_buffer_size as u64,
usage: hal::BufferUses::COPY_DST
| hal::BufferUses::TOP_LEVEL_ACCELERATION_STRUCTURE_INPUT,
memory_flags: hal::MemoryFlags::PREFER_COHERENT,
})
}
.map_err(DeviceError::from_hal)?;
Ok(Arc::new(resource::Tlas {
raw: Snatchable::new(raw),
device: self.clone(),
size_info,
flags: desc.flags,
update_mode: desc.update_mode,
built_index: RwLock::new(rank::TLAS_BUILT_INDEX, None),
dependencies: RwLock::new(rank::TLAS_DEPENDENCIES, Vec::new()),
instance_buffer: ManuallyDrop::new(instance_buffer),
label: desc.label.to_string(),
max_instance_count: desc.max_instances,
tracking_data: TrackingData::new(self.tracker_indices.tlas_s.clone()),
}))
}
}
impl Global {
pub fn device_create_blas(
&self,
device_id: id::DeviceId,
desc: &resource::BlasDescriptor,
sizes: wgt::BlasGeometrySizeDescriptors,
id_in: Option<BlasId>,
) -> (BlasId, Option<u64>, Option<CreateBlasError>) {
profiling::scope!("Device::create_blas");
let fid = self.hub.blas_s.prepare(id_in);
let error = 'error: {
let device = self.hub.devices.get(device_id);
#[cfg(feature = "trace")]
if let Some(trace) = device.trace.lock().as_mut() {
trace.add(trace::Action::CreateBlas {
id: fid.id(),
desc: desc.clone(),
sizes: sizes.clone(),
});
}
let blas = match device.create_blas(desc, sizes) {
Ok(blas) => blas,
Err(e) => break 'error e,
};
let handle = blas.handle;
let id = fid.assign(Fallible::Valid(blas));
api_log!("Device::create_blas -> {id:?}");
return (id, Some(handle), None);
};
let id = fid.assign(Fallible::Invalid(Arc::new(error.to_string())));
(id, None, Some(error))
}
pub fn device_create_tlas(
&self,
device_id: id::DeviceId,
desc: &resource::TlasDescriptor,
id_in: Option<TlasId>,
) -> (TlasId, Option<CreateTlasError>) {
profiling::scope!("Device::create_tlas");
let fid = self.hub.tlas_s.prepare(id_in);
let error = 'error: {
let device = self.hub.devices.get(device_id);
#[cfg(feature = "trace")]
if let Some(trace) = device.trace.lock().as_mut() {
trace.add(trace::Action::CreateTlas {
id: fid.id(),
desc: desc.clone(),
});
}
let tlas = match device.create_tlas(desc) {
Ok(tlas) => tlas,
Err(e) => break 'error e,
};
let id = fid.assign(Fallible::Valid(tlas));
api_log!("Device::create_tlas -> {id:?}");
return (id, None);
};
let id = fid.assign(Fallible::Invalid(Arc::new(error.to_string())));
(id, Some(error))
}
pub fn blas_drop(&self, blas_id: BlasId) {
profiling::scope!("Blas::drop");
api_log!("Blas::drop {blas_id:?}");
let _blas = self.hub.blas_s.remove(blas_id);
#[cfg(feature = "trace")]
if let Ok(blas) = _blas.get() {
if let Some(t) = blas.device.trace.lock().as_mut() {
t.add(trace::Action::DestroyBlas(blas_id));
}
}
}
pub fn tlas_drop(&self, tlas_id: TlasId) {
profiling::scope!("Tlas::drop");
api_log!("Tlas::drop {tlas_id:?}");
let _tlas = self.hub.tlas_s.remove(tlas_id);
#[cfg(feature = "trace")]
if let Ok(tlas) = _tlas.get() {
if let Some(t) = tlas.device.trace.lock().as_mut() {
t.add(trace::Action::DestroyTlas(tlas_id));
}
}
}
}

3760
vendor/wgpu-core/src/device/resource.rs vendored Normal file

File diff suppressed because it is too large Load Diff

259
vendor/wgpu-core/src/device/trace.rs vendored Normal file
View File

@@ -0,0 +1,259 @@
use crate::id;
use std::ops::Range;
#[cfg(feature = "trace")]
use std::{borrow::Cow, io::Write as _};
//TODO: consider a readable Id that doesn't include the backend
type FileName = String;
pub const FILE_NAME: &str = "trace.ron";
#[cfg(feature = "trace")]
pub(crate) fn new_render_bundle_encoder_descriptor<'a>(
label: crate::Label<'a>,
context: &'a super::RenderPassContext,
depth_read_only: bool,
stencil_read_only: bool,
) -> crate::command::RenderBundleEncoderDescriptor<'a> {
crate::command::RenderBundleEncoderDescriptor {
label,
color_formats: Cow::Borrowed(&context.attachments.colors),
depth_stencil: context.attachments.depth_stencil.map(|format| {
wgt::RenderBundleDepthStencil {
format,
depth_read_only,
stencil_read_only,
}
}),
sample_count: context.sample_count,
multiview: context.multiview,
}
}
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Action<'a> {
Init {
desc: crate::device::DeviceDescriptor<'a>,
backend: wgt::Backend,
},
ConfigureSurface(
id::SurfaceId,
wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
),
CreateBuffer(id::BufferId, crate::resource::BufferDescriptor<'a>),
FreeBuffer(id::BufferId),
DestroyBuffer(id::BufferId),
CreateTexture(id::TextureId, crate::resource::TextureDescriptor<'a>),
FreeTexture(id::TextureId),
DestroyTexture(id::TextureId),
CreateTextureView {
id: id::TextureViewId,
parent_id: id::TextureId,
desc: crate::resource::TextureViewDescriptor<'a>,
},
DestroyTextureView(id::TextureViewId),
CreateSampler(id::SamplerId, crate::resource::SamplerDescriptor<'a>),
DestroySampler(id::SamplerId),
GetSurfaceTexture {
id: id::TextureId,
parent_id: id::SurfaceId,
},
Present(id::SurfaceId),
DiscardSurfaceTexture(id::SurfaceId),
CreateBindGroupLayout(
id::BindGroupLayoutId,
crate::binding_model::BindGroupLayoutDescriptor<'a>,
),
DestroyBindGroupLayout(id::BindGroupLayoutId),
CreatePipelineLayout(
id::PipelineLayoutId,
crate::binding_model::PipelineLayoutDescriptor<'a>,
),
DestroyPipelineLayout(id::PipelineLayoutId),
CreateBindGroup(
id::BindGroupId,
crate::binding_model::BindGroupDescriptor<'a>,
),
DestroyBindGroup(id::BindGroupId),
CreateShaderModule {
id: id::ShaderModuleId,
desc: crate::pipeline::ShaderModuleDescriptor<'a>,
data: FileName,
},
DestroyShaderModule(id::ShaderModuleId),
CreateComputePipeline {
id: id::ComputePipelineId,
desc: crate::pipeline::ComputePipelineDescriptor<'a>,
#[cfg_attr(feature = "replay", serde(default))]
implicit_context: Option<super::ImplicitPipelineContext>,
},
DestroyComputePipeline(id::ComputePipelineId),
CreateRenderPipeline {
id: id::RenderPipelineId,
desc: crate::pipeline::RenderPipelineDescriptor<'a>,
#[cfg_attr(feature = "replay", serde(default))]
implicit_context: Option<super::ImplicitPipelineContext>,
},
DestroyRenderPipeline(id::RenderPipelineId),
CreatePipelineCache {
id: id::PipelineCacheId,
desc: crate::pipeline::PipelineCacheDescriptor<'a>,
},
DestroyPipelineCache(id::PipelineCacheId),
CreateRenderBundle {
id: id::RenderBundleId,
desc: crate::command::RenderBundleEncoderDescriptor<'a>,
base: crate::command::BasePass<crate::command::RenderCommand>,
},
DestroyRenderBundle(id::RenderBundleId),
CreateQuerySet {
id: id::QuerySetId,
desc: crate::resource::QuerySetDescriptor<'a>,
},
DestroyQuerySet(id::QuerySetId),
WriteBuffer {
id: id::BufferId,
data: FileName,
range: Range<wgt::BufferAddress>,
queued: bool,
},
WriteTexture {
to: crate::command::TexelCopyTextureInfo,
data: FileName,
layout: wgt::TexelCopyBufferLayout,
size: wgt::Extent3d,
},
Submit(crate::SubmissionIndex, Vec<Command>),
CreateBlas {
id: id::BlasId,
desc: crate::resource::BlasDescriptor<'a>,
sizes: wgt::BlasGeometrySizeDescriptors,
},
DestroyBlas(id::BlasId),
CreateTlas {
id: id::TlasId,
desc: crate::resource::TlasDescriptor<'a>,
},
DestroyTlas(id::TlasId),
}
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Command {
CopyBufferToBuffer {
src: id::BufferId,
src_offset: wgt::BufferAddress,
dst: id::BufferId,
dst_offset: wgt::BufferAddress,
size: wgt::BufferAddress,
},
CopyBufferToTexture {
src: crate::command::TexelCopyBufferInfo,
dst: crate::command::TexelCopyTextureInfo,
size: wgt::Extent3d,
},
CopyTextureToBuffer {
src: crate::command::TexelCopyTextureInfo,
dst: crate::command::TexelCopyBufferInfo,
size: wgt::Extent3d,
},
CopyTextureToTexture {
src: crate::command::TexelCopyTextureInfo,
dst: crate::command::TexelCopyTextureInfo,
size: wgt::Extent3d,
},
ClearBuffer {
dst: id::BufferId,
offset: wgt::BufferAddress,
size: Option<wgt::BufferAddress>,
},
ClearTexture {
dst: id::TextureId,
subresource_range: wgt::ImageSubresourceRange,
},
WriteTimestamp {
query_set_id: id::QuerySetId,
query_index: u32,
},
ResolveQuerySet {
query_set_id: id::QuerySetId,
start_query: u32,
query_count: u32,
destination: id::BufferId,
destination_offset: wgt::BufferAddress,
},
PushDebugGroup(String),
PopDebugGroup,
InsertDebugMarker(String),
RunComputePass {
base: crate::command::BasePass<crate::command::ComputeCommand>,
timestamp_writes: Option<crate::command::PassTimestampWrites>,
},
RunRenderPass {
base: crate::command::BasePass<crate::command::RenderCommand>,
target_colors: Vec<Option<crate::command::RenderPassColorAttachment>>,
target_depth_stencil: Option<crate::command::RenderPassDepthStencilAttachment>,
timestamp_writes: Option<crate::command::PassTimestampWrites>,
occlusion_query_set_id: Option<id::QuerySetId>,
},
BuildAccelerationStructuresUnsafeTlas {
blas: Vec<crate::ray_tracing::TraceBlasBuildEntry>,
tlas: Vec<crate::ray_tracing::TlasBuildEntry>,
},
BuildAccelerationStructures {
blas: Vec<crate::ray_tracing::TraceBlasBuildEntry>,
tlas: Vec<crate::ray_tracing::TraceTlasPackage>,
},
}
#[cfg(feature = "trace")]
#[derive(Debug)]
pub struct Trace {
path: std::path::PathBuf,
file: std::fs::File,
config: ron::ser::PrettyConfig,
binary_id: usize,
}
#[cfg(feature = "trace")]
impl Trace {
pub fn new(path: &std::path::Path) -> Result<Self, std::io::Error> {
log::info!("Tracing into '{:?}'", path);
let mut file = std::fs::File::create(path.join(FILE_NAME))?;
file.write_all(b"[\n")?;
Ok(Self {
path: path.to_path_buf(),
file,
config: ron::ser::PrettyConfig::default(),
binary_id: 0,
})
}
pub fn make_binary(&mut self, kind: &str, data: &[u8]) -> String {
self.binary_id += 1;
let name = format!("data{}.{}", self.binary_id, kind);
let _ = std::fs::write(self.path.join(&name), data);
name
}
pub(crate) fn add(&mut self, action: Action) {
match ron::ser::to_string_pretty(&action, self.config.clone()) {
Ok(string) => {
let _ = writeln!(self.file, "{},", string);
}
Err(e) => {
log::warn!("RON serialization failure: {:?}", e);
}
}
}
}
#[cfg(feature = "trace")]
impl Drop for Trace {
fn drop(&mut self) {
let _ = self.file.write_all(b"]");
}
}

64
vendor/wgpu-core/src/error.rs vendored Normal file
View File

@@ -0,0 +1,64 @@
use core::fmt;
use std::{error::Error, sync::Arc};
use thiserror::Error;
#[cfg(send_sync)]
pub type ContextErrorSource = Box<dyn Error + Send + Sync + 'static>;
#[cfg(not(send_sync))]
pub type ContextErrorSource = Box<dyn Error + 'static>;
#[derive(Debug, Error)]
#[error(
"In {fn_ident}{}{}{}",
if self.label.is_empty() { "" } else { ", label = '" },
self.label,
if self.label.is_empty() { "" } else { "'" }
)]
pub struct ContextError {
pub fn_ident: &'static str,
#[source]
pub source: ContextErrorSource,
pub label: String,
}
/// Don't use this error type with thiserror's #[error(transparent)]
#[derive(Clone)]
pub struct MultiError {
inner: Vec<Arc<dyn Error + Send + Sync + 'static>>,
}
impl MultiError {
pub fn new<T: Error + Send + Sync + 'static>(
iter: impl ExactSizeIterator<Item = T>,
) -> Option<Self> {
if iter.len() == 0 {
return None;
}
Some(Self {
inner: iter.map(Box::from).map(Arc::from).collect(),
})
}
pub fn errors(&self) -> Box<dyn Iterator<Item = &(dyn Error + Send + Sync + 'static)> + '_> {
Box::new(self.inner.iter().map(|e| e.as_ref()))
}
}
impl fmt::Debug for MultiError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
fmt::Debug::fmt(&self.inner[0], f)
}
}
impl fmt::Display for MultiError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
fmt::Display::fmt(&self.inner[0], f)
}
}
impl Error for MultiError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
self.inner[0].source()
}
}

105
vendor/wgpu-core/src/global.rs vendored Normal file
View File

@@ -0,0 +1,105 @@
use std::{fmt, sync::Arc};
use crate::{
hal_api::HalApi,
hub::{Hub, HubReport},
instance::{Instance, Surface},
registry::{Registry, RegistryReport},
resource_log,
};
#[derive(Debug, PartialEq, Eq)]
pub struct GlobalReport {
pub surfaces: RegistryReport,
pub hub: HubReport,
}
impl GlobalReport {
pub fn surfaces(&self) -> &RegistryReport {
&self.surfaces
}
pub fn hub_report(&self) -> &HubReport {
&self.hub
}
}
pub struct Global {
pub(crate) surfaces: Registry<Arc<Surface>>,
pub(crate) hub: Hub,
// the instance must be dropped last
pub instance: Instance,
}
impl Global {
pub fn new(name: &str, instance_desc: &wgt::InstanceDescriptor) -> Self {
profiling::scope!("Global::new");
Self {
instance: Instance::new(name, instance_desc),
surfaces: Registry::new(),
hub: Hub::new(),
}
}
/// # Safety
///
/// Refer to the creation of wgpu-hal Instance for every backend.
pub unsafe fn from_hal_instance<A: HalApi>(name: &str, hal_instance: A::Instance) -> Self {
profiling::scope!("Global::new");
let dyn_instance: Box<dyn hal::DynInstance> = Box::new(hal_instance);
Self {
instance: Instance {
name: name.to_owned(),
instance_per_backend: std::iter::once((A::VARIANT, dyn_instance)).collect(),
..Default::default()
},
surfaces: Registry::new(),
hub: Hub::new(),
}
}
/// # Safety
///
/// - The raw instance handle returned must not be manually destroyed.
pub unsafe fn instance_as_hal<A: HalApi>(&self) -> Option<&A::Instance> {
unsafe { self.instance.as_hal::<A>() }
}
/// # Safety
///
/// - The raw handles obtained from the Instance must not be manually destroyed
pub unsafe fn from_instance(instance: Instance) -> Self {
profiling::scope!("Global::new");
Self {
instance,
surfaces: Registry::new(),
hub: Hub::new(),
}
}
pub fn generate_report(&self) -> GlobalReport {
GlobalReport {
surfaces: self.surfaces.generate_report(),
hub: self.hub.generate_report(),
}
}
}
impl fmt::Debug for Global {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Global").finish()
}
}
impl Drop for Global {
fn drop(&mut self) {
profiling::scope!("Global::drop");
resource_log!("Global::drop");
}
}
#[cfg(send_sync)]
fn _test_send_sync(global: &Global) {
fn test_internal<T: Send + Sync>(_: T) {}
test_internal(global)
}

29
vendor/wgpu-core/src/hal_api.rs vendored Normal file
View File

@@ -0,0 +1,29 @@
use wgt::{Backend, WasmNotSendSync};
pub trait HalApi: hal::Api + 'static + WasmNotSendSync {
const VARIANT: Backend;
}
impl HalApi for hal::api::Empty {
const VARIANT: Backend = Backend::Empty;
}
#[cfg(vulkan)]
impl HalApi for hal::api::Vulkan {
const VARIANT: Backend = Backend::Vulkan;
}
#[cfg(metal)]
impl HalApi for hal::api::Metal {
const VARIANT: Backend = Backend::Metal;
}
#[cfg(dx12)]
impl HalApi for hal::api::Dx12 {
const VARIANT: Backend = Backend::Dx12;
}
#[cfg(gles)]
impl HalApi for hal::api::Gles {
const VARIANT: Backend = Backend::Gl;
}

14
vendor/wgpu-core/src/hash_utils.rs vendored Normal file
View File

@@ -0,0 +1,14 @@
//! Module for hashing utilities.
//!
//! Named hash_utils to prevent clashing with the std::hash module.
/// HashMap using a fast, non-cryptographic hash algorithm.
pub type FastHashMap<K, V> =
std::collections::HashMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
/// HashSet using a fast, non-cryptographic hash algorithm.
pub type FastHashSet<K> =
std::collections::HashSet<K, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
/// IndexMap using a fast, non-cryptographic hash algorithm.
pub type FastIndexMap<K, V> =
indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;

234
vendor/wgpu-core/src/hub.rs vendored Normal file
View File

@@ -0,0 +1,234 @@
/*! Allocating resource ids, and tracking the resources they refer to.
The `wgpu_core` API uses identifiers of type [`Id<R>`] to refer to
resources of type `R`. For example, [`id::DeviceId`] is an alias for
`Id<markers::Device>`, and [`id::BufferId`] is an alias for
`Id<markers::Buffer>`. `Id` implements `Copy`, `Hash`, `Eq`, `Ord`, and
of course `Debug`.
[`id::DeviceId`]: crate::id::DeviceId
[`id::BufferId`]: crate::id::BufferId
Each `Id` contains not only an index for the resource it denotes but
also a Backend indicating which `wgpu` backend it belongs to.
`Id`s also incorporate a generation number, for additional validation.
The resources to which identifiers refer are freed explicitly.
Attempting to use an identifier for a resource that has been freed
elicits an error result.
## Assigning ids to resources
The users of `wgpu_core` generally want resource ids to be assigned
in one of two ways:
- Users like `wgpu` want `wgpu_core` to assign ids to resources itself.
For example, `wgpu` expects to call `Global::device_create_buffer`
and have the return value indicate the newly created buffer's id.
- Users like `player` and Firefox want to allocate ids themselves, and
pass `Global::device_create_buffer` and friends the id to assign the
new resource.
To accommodate either pattern, `wgpu_core` methods that create
resources all expect an `id_in` argument that the caller can use to
specify the id, and they all return the id used. For example, the
declaration of `Global::device_create_buffer` looks like this:
```ignore
impl Global {
/* ... */
pub fn device_create_buffer<A: HalApi>(
&self,
device_id: id::DeviceId,
desc: &resource::BufferDescriptor,
id_in: Input<G>,
) -> (id::BufferId, Option<resource::CreateBufferError>) {
/* ... */
}
/* ... */
}
```
Users that want to assign resource ids themselves pass in the id they
want as the `id_in` argument, whereas users that want `wgpu_core`
itself to choose ids always pass `()`. In either case, the id
ultimately assigned is returned as the first element of the tuple.
Producing true identifiers from `id_in` values is the job of an
[`crate::identity::IdentityManager`] or ids will be received from outside through `Option<Id>` arguments.
## Id allocation and streaming
Perhaps surprisingly, allowing users to assign resource ids themselves
enables major performance improvements in some applications.
The `wgpu_core` API is designed for use by Firefox's [WebGPU]
implementation. For security, web content and GPU use must be kept
segregated in separate processes, with all interaction between them
mediated by an inter-process communication protocol. As web content uses
the WebGPU API, the content process sends messages to the GPU process,
which interacts with the platform's GPU APIs on content's behalf,
occasionally sending results back.
In a classic Rust API, a resource allocation function takes parameters
describing the resource to create, and if creation succeeds, it returns
the resource id in a `Result::Ok` value. However, this design is a poor
fit for the split-process design described above: content must wait for
the reply to its buffer-creation message (say) before it can know which
id it can use in the next message that uses that buffer. On a common
usage pattern, the classic Rust design imposes the latency of a full
cross-process round trip.
We can avoid incurring these round-trip latencies simply by letting the
content process assign resource ids itself. With this approach, content
can choose an id for the new buffer, send a message to create the
buffer, and then immediately send the next message operating on that
buffer, since it already knows its id. Allowing content and GPU process
activity to be pipelined greatly improves throughput.
To help propagate errors correctly in this style of usage, when resource
creation fails, the id supplied for that resource is marked to indicate
as much, allowing subsequent operations using that id to be properly
flagged as errors as well.
[`process`]: crate::identity::IdentityManager::process
[`Id<R>`]: crate::id::Id
[wrapped in a mutex]: trait.IdentityHandler.html#impl-IdentityHandler%3CI%3E-for-Mutex%3CIdentityManager%3E
[WebGPU]: https://www.w3.org/TR/webgpu/
*/
use crate::{
binding_model::{BindGroup, BindGroupLayout, PipelineLayout},
command::{CommandBuffer, RenderBundle},
device::{queue::Queue, Device},
instance::Adapter,
pipeline::{ComputePipeline, PipelineCache, RenderPipeline, ShaderModule},
registry::{Registry, RegistryReport},
resource::{
Blas, Buffer, Fallible, QuerySet, Sampler, StagingBuffer, Texture, TextureView, Tlas,
},
};
use std::{fmt::Debug, sync::Arc};
#[derive(Debug, PartialEq, Eq)]
pub struct HubReport {
pub adapters: RegistryReport,
pub devices: RegistryReport,
pub queues: RegistryReport,
pub pipeline_layouts: RegistryReport,
pub shader_modules: RegistryReport,
pub bind_group_layouts: RegistryReport,
pub bind_groups: RegistryReport,
pub command_buffers: RegistryReport,
pub render_bundles: RegistryReport,
pub render_pipelines: RegistryReport,
pub compute_pipelines: RegistryReport,
pub pipeline_caches: RegistryReport,
pub query_sets: RegistryReport,
pub buffers: RegistryReport,
pub textures: RegistryReport,
pub texture_views: RegistryReport,
pub samplers: RegistryReport,
}
impl HubReport {
pub fn is_empty(&self) -> bool {
self.adapters.is_empty()
}
}
#[allow(rustdoc::private_intra_doc_links)]
/// All the resources tracked by a [`crate::global::Global`].
///
/// ## Locking
///
/// Each field in `Hub` is a [`Registry`] holding all the values of a
/// particular type of resource, all protected by a single RwLock.
/// So for example, to access any [`Buffer`], you must acquire a read
/// lock on the `Hub`s entire buffers registry. The lock guard
/// gives you access to the `Registry`'s [`Storage`], which you can
/// then index with the buffer's id. (Yes, this design causes
/// contention; see [#2272].)
///
/// But most `wgpu` operations require access to several different
/// kinds of resource, so you often need to hold locks on several
/// different fields of your [`Hub`] simultaneously.
///
/// Inside the `Registry` there are `Arc<T>` where `T` is a Resource
/// Lock of `Registry` happens only when accessing to get the specific resource
///
/// [`Storage`]: crate::storage::Storage
pub struct Hub {
pub(crate) adapters: Registry<Arc<Adapter>>,
pub(crate) devices: Registry<Arc<Device>>,
pub(crate) queues: Registry<Arc<Queue>>,
pub(crate) pipeline_layouts: Registry<Fallible<PipelineLayout>>,
pub(crate) shader_modules: Registry<Fallible<ShaderModule>>,
pub(crate) bind_group_layouts: Registry<Fallible<BindGroupLayout>>,
pub(crate) bind_groups: Registry<Fallible<BindGroup>>,
pub(crate) command_buffers: Registry<Arc<CommandBuffer>>,
pub(crate) render_bundles: Registry<Fallible<RenderBundle>>,
pub(crate) render_pipelines: Registry<Fallible<RenderPipeline>>,
pub(crate) compute_pipelines: Registry<Fallible<ComputePipeline>>,
pub(crate) pipeline_caches: Registry<Fallible<PipelineCache>>,
pub(crate) query_sets: Registry<Fallible<QuerySet>>,
pub(crate) buffers: Registry<Fallible<Buffer>>,
pub(crate) staging_buffers: Registry<StagingBuffer>,
pub(crate) textures: Registry<Fallible<Texture>>,
pub(crate) texture_views: Registry<Fallible<TextureView>>,
pub(crate) samplers: Registry<Fallible<Sampler>>,
pub(crate) blas_s: Registry<Fallible<Blas>>,
pub(crate) tlas_s: Registry<Fallible<Tlas>>,
}
impl Hub {
pub(crate) fn new() -> Self {
Self {
adapters: Registry::new(),
devices: Registry::new(),
queues: Registry::new(),
pipeline_layouts: Registry::new(),
shader_modules: Registry::new(),
bind_group_layouts: Registry::new(),
bind_groups: Registry::new(),
command_buffers: Registry::new(),
render_bundles: Registry::new(),
render_pipelines: Registry::new(),
compute_pipelines: Registry::new(),
pipeline_caches: Registry::new(),
query_sets: Registry::new(),
buffers: Registry::new(),
staging_buffers: Registry::new(),
textures: Registry::new(),
texture_views: Registry::new(),
samplers: Registry::new(),
blas_s: Registry::new(),
tlas_s: Registry::new(),
}
}
pub fn generate_report(&self) -> HubReport {
HubReport {
adapters: self.adapters.generate_report(),
devices: self.devices.generate_report(),
queues: self.queues.generate_report(),
pipeline_layouts: self.pipeline_layouts.generate_report(),
shader_modules: self.shader_modules.generate_report(),
bind_group_layouts: self.bind_group_layouts.generate_report(),
bind_groups: self.bind_groups.generate_report(),
command_buffers: self.command_buffers.generate_report(),
render_bundles: self.render_bundles.generate_report(),
render_pipelines: self.render_pipelines.generate_report(),
compute_pipelines: self.compute_pipelines.generate_report(),
pipeline_caches: self.pipeline_caches.generate_report(),
query_sets: self.query_sets.generate_report(),
buffers: self.buffers.generate_report(),
textures: self.textures.generate_report(),
texture_views: self.texture_views.generate_report(),
samplers: self.samplers.generate_report(),
}
}
}

295
vendor/wgpu-core/src/id.rs vendored Normal file
View File

@@ -0,0 +1,295 @@
use crate::{Epoch, Index};
use std::{
cmp::Ordering,
fmt::{self, Debug},
hash::Hash,
marker::PhantomData,
mem::size_of,
num::NonZeroU64,
};
use wgt::WasmNotSendSync;
const _: () = {
if size_of::<Index>() != 4 {
panic!()
}
};
const _: () = {
if size_of::<Epoch>() != 4 {
panic!()
}
};
const _: () = {
if size_of::<RawId>() != 8 {
panic!()
}
};
/// The raw underlying representation of an identifier.
#[repr(transparent)]
#[cfg_attr(
any(feature = "serde", feature = "trace"),
derive(serde::Serialize),
serde(into = "SerialId")
)]
#[cfg_attr(
any(feature = "serde", feature = "replay"),
derive(serde::Deserialize),
serde(from = "SerialId")
)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RawId(NonZeroU64);
impl RawId {
/// Zip together an identifier and return its raw underlying representation.
pub fn zip(index: Index, epoch: Epoch) -> RawId {
let v = (index as u64) | ((epoch as u64) << 32);
Self(NonZeroU64::new(v).unwrap())
}
/// Unzip a raw identifier into its components.
pub fn unzip(self) -> (Index, Epoch) {
(self.0.get() as Index, (self.0.get() >> 32) as Epoch)
}
}
/// An identifier for a wgpu object.
///
/// An `Id<T>` value identifies a value stored in a [`Global`]'s [`Hub`].
///
/// ## Note on `Id` typing
///
/// You might assume that an `Id<T>` can only be used to retrieve a resource of
/// type `T`, but that is not quite the case. The id types in `wgpu-core`'s
/// public API ([`TextureId`], for example) can refer to resources belonging to
/// any backend, but the corresponding resource types ([`Texture<A>`], for
/// example) are always parameterized by a specific backend `A`.
///
/// So the `T` in `Id<T>` is usually a resource type like `Texture<Empty>`,
/// where [`Empty`] is the `wgpu_hal` dummy back end. These empty types are
/// never actually used, beyond just making sure you access each `Storage` with
/// the right kind of identifier. The members of [`Hub<A>`] pair up each
/// `X<Empty>` type with the resource type `X<A>`, for some specific backend
/// `A`.
///
/// [`Global`]: crate::global::Global
/// [`Hub`]: crate::hub::Hub
/// [`Hub<A>`]: crate::hub::Hub
/// [`Texture<A>`]: crate::resource::Texture
/// [`Registry`]: crate::hub::Registry
/// [`Empty`]: hal::api::Empty
#[repr(transparent)]
#[cfg_attr(any(feature = "serde", feature = "trace"), derive(serde::Serialize))]
#[cfg_attr(any(feature = "serde", feature = "replay"), derive(serde::Deserialize))]
#[cfg_attr(
any(feature = "serde", feature = "trace", feature = "replay"),
serde(transparent)
)]
pub struct Id<T: Marker>(RawId, PhantomData<T>);
// This type represents Id in a more readable (and editable) way.
#[allow(dead_code)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
enum SerialId {
// The only variant forces RON to not ignore "Id"
Id(Index, Epoch),
}
impl From<RawId> for SerialId {
fn from(id: RawId) -> Self {
let (index, epoch) = id.unzip();
Self::Id(index, epoch)
}
}
impl From<SerialId> for RawId {
fn from(id: SerialId) -> Self {
match id {
SerialId::Id(index, epoch) => RawId::zip(index, epoch),
}
}
}
impl<T> Id<T>
where
T: Marker,
{
/// # Safety
///
/// The raw id must be valid for the type.
pub unsafe fn from_raw(raw: RawId) -> Self {
Self(raw, PhantomData)
}
/// Coerce the identifiers into its raw underlying representation.
pub fn into_raw(self) -> RawId {
self.0
}
#[inline]
pub fn zip(index: Index, epoch: Epoch) -> Self {
Id(RawId::zip(index, epoch), PhantomData)
}
#[inline]
pub fn unzip(self) -> (Index, Epoch) {
self.0.unzip()
}
}
impl<T> Copy for Id<T> where T: Marker {}
impl<T> Clone for Id<T>
where
T: Marker,
{
#[inline]
fn clone(&self) -> Self {
*self
}
}
impl<T> Debug for Id<T>
where
T: Marker,
{
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
let (index, epoch) = self.unzip();
write!(formatter, "Id({index},{epoch})")?;
Ok(())
}
}
impl<T> Hash for Id<T>
where
T: Marker,
{
#[inline]
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl<T> PartialEq for Id<T>
where
T: Marker,
{
#[inline]
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<T> Eq for Id<T> where T: Marker {}
impl<T> PartialOrd for Id<T>
where
T: Marker,
{
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl<T> Ord for Id<T>
where
T: Marker,
{
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
self.0.cmp(&other.0)
}
}
/// Marker trait used to determine which types uniquely identify a resource.
///
/// For example, `Device<A>` will have the same type of identifier as
/// `Device<B>` because `Device<T>` for any `T` defines the same maker type.
pub trait Marker: 'static + WasmNotSendSync {}
// This allows `()` to be used as a marker type for tests.
//
// We don't want these in production code, since they essentially remove type
// safety, like how identifiers across different types can be compared.
#[cfg(test)]
impl Marker for () {}
/// Define identifiers for each resource.
macro_rules! ids {
($(
$(#[$($meta:meta)*])*
pub type $name:ident $marker:ident;
)*) => {
/// Marker types for each resource.
pub mod markers {
$(
#[derive(Debug)]
pub enum $marker {}
impl super::Marker for $marker {}
)*
}
$(
$(#[$($meta)*])*
pub type $name = Id<self::markers::$marker>;
)*
}
}
ids! {
pub type AdapterId Adapter;
pub type SurfaceId Surface;
pub type DeviceId Device;
pub type QueueId Queue;
pub type BufferId Buffer;
pub type StagingBufferId StagingBuffer;
pub type TextureViewId TextureView;
pub type TextureId Texture;
pub type SamplerId Sampler;
pub type BindGroupLayoutId BindGroupLayout;
pub type PipelineLayoutId PipelineLayout;
pub type BindGroupId BindGroup;
pub type ShaderModuleId ShaderModule;
pub type RenderPipelineId RenderPipeline;
pub type ComputePipelineId ComputePipeline;
pub type PipelineCacheId PipelineCache;
pub type CommandEncoderId CommandEncoder;
pub type CommandBufferId CommandBuffer;
pub type RenderPassEncoderId RenderPassEncoder;
pub type ComputePassEncoderId ComputePassEncoder;
pub type RenderBundleEncoderId RenderBundleEncoder;
pub type RenderBundleId RenderBundle;
pub type QuerySetId QuerySet;
pub type BlasId Blas;
pub type TlasId Tlas;
}
// The CommandBuffer type serves both as encoder and
// buffer, which is why the 2 functions below exist.
impl CommandEncoderId {
pub fn into_command_buffer_id(self) -> CommandBufferId {
Id(self.0, PhantomData)
}
}
impl CommandBufferId {
pub fn into_command_encoder_id(self) -> CommandEncoderId {
Id(self.0, PhantomData)
}
}
#[test]
fn test_id() {
let indexes = [0, Index::MAX / 2 - 1, Index::MAX / 2 + 1, Index::MAX];
let epochs = [1, Epoch::MAX / 2 - 1, Epoch::MAX / 2 + 1, Epoch::MAX];
for &i in &indexes {
for &e in &epochs {
let id = Id::<()>::zip(i, e);
let (index, epoch) = id.unzip();
assert_eq!(index, i);
assert_eq!(epoch, e);
}
}
}

142
vendor/wgpu-core/src/identity.rs vendored Normal file
View File

@@ -0,0 +1,142 @@
use crate::{
id::{Id, Marker},
lock::{rank, Mutex},
Epoch, Index,
};
use std::{fmt::Debug, marker::PhantomData};
#[derive(Copy, Clone, Debug, PartialEq)]
enum IdSource {
External,
Allocated,
None,
}
/// A simple structure to allocate [`Id`] identifiers.
///
/// Calling [`alloc`] returns a fresh, never-before-seen id. Calling [`release`]
/// marks an id as dead; it will never be returned again by `alloc`.
///
/// `IdentityValues` returns `Id`s whose index values are suitable for use as
/// indices into a `Vec<T>` that holds those ids' referents:
///
/// - Every live id has a distinct index value. Every live id's index
/// selects a distinct element in the vector.
///
/// - `IdentityValues` prefers low index numbers. If you size your vector to
/// accommodate the indices produced here, the vector's length will reflect
/// the highwater mark of actual occupancy.
///
/// - `IdentityValues` reuses the index values of freed ids before returning
/// ids with new index values. Freed vector entries get reused.
///
/// [`Id`]: crate::id::Id
/// [`Backend`]: wgt::Backend;
/// [`alloc`]: IdentityValues::alloc
/// [`release`]: IdentityValues::release
#[derive(Debug)]
pub(super) struct IdentityValues {
free: Vec<(Index, Epoch)>,
next_index: Index,
count: usize,
// Sanity check: The allocation logic works under the assumption that we don't
// do a mix of allocating ids from here and providing ids manually for the same
// storage container.
id_source: IdSource,
}
impl IdentityValues {
/// Allocate a fresh, never-before-seen id with the given `backend`.
///
/// The backend is incorporated into the id, so that ids allocated with
/// different `backend` values are always distinct.
pub fn alloc<T: Marker>(&mut self) -> Id<T> {
assert!(
self.id_source != IdSource::External,
"Mix of internally allocated and externally provided IDs"
);
self.id_source = IdSource::Allocated;
self.count += 1;
match self.free.pop() {
Some((index, epoch)) => Id::zip(index, epoch + 1),
None => {
let index = self.next_index;
self.next_index += 1;
let epoch = 1;
Id::zip(index, epoch)
}
}
}
pub fn mark_as_used<T: Marker>(&mut self, id: Id<T>) -> Id<T> {
assert!(
self.id_source != IdSource::Allocated,
"Mix of internally allocated and externally provided IDs"
);
self.id_source = IdSource::External;
self.count += 1;
id
}
/// Free `id`. It will never be returned from `alloc` again.
pub fn release<T: Marker>(&mut self, id: Id<T>) {
if let IdSource::Allocated = self.id_source {
let (index, epoch) = id.unzip();
self.free.push((index, epoch));
}
self.count -= 1;
}
pub fn count(&self) -> usize {
self.count
}
}
#[derive(Debug)]
pub struct IdentityManager<T: Marker> {
pub(super) values: Mutex<IdentityValues>,
_phantom: PhantomData<T>,
}
impl<T: Marker> IdentityManager<T> {
pub fn process(&self) -> Id<T> {
self.values.lock().alloc()
}
pub fn mark_as_used(&self, id: Id<T>) -> Id<T> {
self.values.lock().mark_as_used(id)
}
pub fn free(&self, id: Id<T>) {
self.values.lock().release(id)
}
}
impl<T: Marker> IdentityManager<T> {
pub fn new() -> Self {
Self {
values: Mutex::new(
rank::IDENTITY_MANAGER_VALUES,
IdentityValues {
free: Vec::new(),
next_index: 0,
count: 0,
id_source: IdSource::None,
},
),
_phantom: PhantomData,
}
}
}
#[test]
fn test_epoch_end_of_life() {
use crate::id;
let man = IdentityManager::<id::markers::Buffer>::new();
let id1 = man.process();
assert_eq!(id1.unzip(), (0, 1));
man.free(id1);
let id2 = man.process();
// confirm that the epoch 1 is no longer re-used
assert_eq!(id2.unzip(), (0, 2));
}

View File

@@ -0,0 +1,397 @@
use std::mem::size_of;
use std::num::NonZeroU64;
use thiserror::Error;
use crate::{
device::DeviceError,
pipeline::{CreateComputePipelineError, CreateShaderModuleError},
};
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreateDispatchIndirectValidationPipelineError {
#[error(transparent)]
DeviceError(#[from] DeviceError),
#[error(transparent)]
ShaderModule(#[from] CreateShaderModuleError),
#[error(transparent)]
ComputePipeline(#[from] CreateComputePipelineError),
}
/// This machinery requires the following limits:
///
/// - max_bind_groups: 2,
/// - max_dynamic_storage_buffers_per_pipeline_layout: 1,
/// - max_storage_buffers_per_shader_stage: 2,
/// - max_storage_buffer_binding_size: 3 * min_storage_buffer_offset_alignment,
/// - max_push_constant_size: 4,
/// - max_compute_invocations_per_workgroup 1
///
/// These are all indirectly satisfied by `DownlevelFlags::INDIRECT_EXECUTION`, which is also
/// required for this module's functionality to work.
#[derive(Debug)]
pub struct IndirectValidation {
module: Box<dyn hal::DynShaderModule>,
dst_bind_group_layout: Box<dyn hal::DynBindGroupLayout>,
src_bind_group_layout: Box<dyn hal::DynBindGroupLayout>,
pipeline_layout: Box<dyn hal::DynPipelineLayout>,
pipeline: Box<dyn hal::DynComputePipeline>,
dst_buffer: Box<dyn hal::DynBuffer>,
dst_bind_group: Box<dyn hal::DynBindGroup>,
}
pub struct Params<'a> {
pub pipeline_layout: &'a dyn hal::DynPipelineLayout,
pub pipeline: &'a dyn hal::DynComputePipeline,
pub dst_buffer: &'a dyn hal::DynBuffer,
pub dst_bind_group: &'a dyn hal::DynBindGroup,
pub aligned_offset: u64,
pub offset_remainder: u64,
}
impl IndirectValidation {
pub fn new(
device: &dyn hal::DynDevice,
limits: &wgt::Limits,
) -> Result<Self, CreateDispatchIndirectValidationPipelineError> {
let max_compute_workgroups_per_dimension = limits.max_compute_workgroups_per_dimension;
let src = format!(
"
@group(0) @binding(0)
var<storage, read_write> dst: array<u32, 6>;
@group(1) @binding(0)
var<storage, read> src: array<u32>;
struct OffsetPc {{
inner: u32,
}}
var<push_constant> offset: OffsetPc;
@compute @workgroup_size(1)
fn main() {{
let src = vec3(src[offset.inner], src[offset.inner + 1], src[offset.inner + 2]);
let max_compute_workgroups_per_dimension = {max_compute_workgroups_per_dimension}u;
if (
src.x > max_compute_workgroups_per_dimension ||
src.y > max_compute_workgroups_per_dimension ||
src.z > max_compute_workgroups_per_dimension
) {{
dst = array(0u, 0u, 0u, 0u, 0u, 0u);
}} else {{
dst = array(src.x, src.y, src.z, src.x, src.y, src.z);
}}
}}
"
);
// SAFETY: The value we are passing to `new_unchecked` is not zero, so this is safe.
const SRC_BUFFER_SIZE: NonZeroU64 =
unsafe { NonZeroU64::new_unchecked(size_of::<u32>() as u64 * 3) };
// SAFETY: The value we are passing to `new_unchecked` is not zero, so this is safe.
const DST_BUFFER_SIZE: NonZeroU64 = unsafe {
NonZeroU64::new_unchecked(
SRC_BUFFER_SIZE.get() * 2, // From above: `dst: array<u32, 6>`
)
};
let module = naga::front::wgsl::parse_str(&src).map_err(|inner| {
CreateShaderModuleError::Parsing(naga::error::ShaderError {
source: src.clone(),
label: None,
inner: Box::new(inner),
})
})?;
let info = crate::device::create_validator(
wgt::Features::PUSH_CONSTANTS,
wgt::DownlevelFlags::empty(),
naga::valid::ValidationFlags::all(),
)
.validate(&module)
.map_err(|inner| {
CreateShaderModuleError::Validation(naga::error::ShaderError {
source: src,
label: None,
inner: Box::new(inner),
})
})?;
let hal_shader = hal::ShaderInput::Naga(hal::NagaShader {
module: std::borrow::Cow::Owned(module),
info,
debug_source: None,
});
let hal_desc = hal::ShaderModuleDescriptor {
label: None,
runtime_checks: wgt::ShaderRuntimeChecks::unchecked(),
};
let module =
unsafe { device.create_shader_module(&hal_desc, hal_shader) }.map_err(|error| {
match error {
hal::ShaderError::Device(error) => {
CreateShaderModuleError::Device(DeviceError::from_hal(error))
}
hal::ShaderError::Compilation(ref msg) => {
log::error!("Shader error: {}", msg);
CreateShaderModuleError::Generation
}
}
})?;
let dst_bind_group_layout_desc = hal::BindGroupLayoutDescriptor {
label: None,
flags: hal::BindGroupLayoutFlags::empty(),
entries: &[wgt::BindGroupLayoutEntry {
binding: 0,
visibility: wgt::ShaderStages::COMPUTE,
ty: wgt::BindingType::Buffer {
ty: wgt::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: Some(DST_BUFFER_SIZE),
},
count: None,
}],
};
let dst_bind_group_layout = unsafe {
device
.create_bind_group_layout(&dst_bind_group_layout_desc)
.map_err(DeviceError::from_hal)?
};
let src_bind_group_layout_desc = hal::BindGroupLayoutDescriptor {
label: None,
flags: hal::BindGroupLayoutFlags::empty(),
entries: &[wgt::BindGroupLayoutEntry {
binding: 0,
visibility: wgt::ShaderStages::COMPUTE,
ty: wgt::BindingType::Buffer {
ty: wgt::BufferBindingType::Storage { read_only: true },
has_dynamic_offset: true,
min_binding_size: Some(SRC_BUFFER_SIZE),
},
count: None,
}],
};
let src_bind_group_layout = unsafe {
device
.create_bind_group_layout(&src_bind_group_layout_desc)
.map_err(DeviceError::from_hal)?
};
let pipeline_layout_desc = hal::PipelineLayoutDescriptor {
label: None,
flags: hal::PipelineLayoutFlags::empty(),
bind_group_layouts: &[
dst_bind_group_layout.as_ref(),
src_bind_group_layout.as_ref(),
],
push_constant_ranges: &[wgt::PushConstantRange {
stages: wgt::ShaderStages::COMPUTE,
range: 0..4,
}],
};
let pipeline_layout = unsafe {
device
.create_pipeline_layout(&pipeline_layout_desc)
.map_err(DeviceError::from_hal)?
};
let pipeline_desc = hal::ComputePipelineDescriptor {
label: None,
layout: pipeline_layout.as_ref(),
stage: hal::ProgrammableStage {
module: module.as_ref(),
entry_point: "main",
constants: &Default::default(),
zero_initialize_workgroup_memory: false,
},
cache: None,
};
let pipeline =
unsafe { device.create_compute_pipeline(&pipeline_desc) }.map_err(|err| match err {
hal::PipelineError::Device(error) => {
CreateComputePipelineError::Device(DeviceError::from_hal(error))
}
hal::PipelineError::Linkage(_stages, msg) => {
CreateComputePipelineError::Internal(msg)
}
hal::PipelineError::EntryPoint(_stage) => CreateComputePipelineError::Internal(
crate::device::ENTRYPOINT_FAILURE_ERROR.to_string(),
),
hal::PipelineError::PipelineConstants(_, error) => {
CreateComputePipelineError::PipelineConstants(error)
}
})?;
let dst_buffer_desc = hal::BufferDescriptor {
label: None,
size: DST_BUFFER_SIZE.get(),
usage: hal::BufferUses::INDIRECT | hal::BufferUses::STORAGE_READ_WRITE,
memory_flags: hal::MemoryFlags::empty(),
};
let dst_buffer =
unsafe { device.create_buffer(&dst_buffer_desc) }.map_err(DeviceError::from_hal)?;
let dst_bind_group_desc = hal::BindGroupDescriptor {
label: None,
layout: dst_bind_group_layout.as_ref(),
entries: &[hal::BindGroupEntry {
binding: 0,
resource_index: 0,
count: 1,
}],
buffers: &[hal::BufferBinding {
buffer: dst_buffer.as_ref(),
offset: 0,
size: Some(DST_BUFFER_SIZE),
}],
samplers: &[],
textures: &[],
acceleration_structures: &[],
};
let dst_bind_group = unsafe {
device
.create_bind_group(&dst_bind_group_desc)
.map_err(DeviceError::from_hal)
}?;
Ok(Self {
module,
dst_bind_group_layout,
src_bind_group_layout,
pipeline_layout,
pipeline,
dst_buffer,
dst_bind_group,
})
}
/// `Ok(None)` will only be returned if `buffer_size` is `0`.
pub fn create_src_bind_group(
&self,
device: &dyn hal::DynDevice,
limits: &wgt::Limits,
buffer_size: u64,
buffer: &dyn hal::DynBuffer,
) -> Result<Option<Box<dyn hal::DynBindGroup>>, DeviceError> {
let binding_size = calculate_src_buffer_binding_size(buffer_size, limits);
let Some(binding_size) = NonZeroU64::new(binding_size) else {
return Ok(None);
};
let hal_desc = hal::BindGroupDescriptor {
label: None,
layout: self.src_bind_group_layout.as_ref(),
entries: &[hal::BindGroupEntry {
binding: 0,
resource_index: 0,
count: 1,
}],
buffers: &[hal::BufferBinding {
buffer,
offset: 0,
size: Some(binding_size),
}],
samplers: &[],
textures: &[],
acceleration_structures: &[],
};
unsafe {
device
.create_bind_group(&hal_desc)
.map(Some)
.map_err(DeviceError::from_hal)
}
}
pub fn params<'a>(&'a self, limits: &wgt::Limits, offset: u64, buffer_size: u64) -> Params<'a> {
// The offset we receive is only required to be aligned to 4 bytes.
//
// Binding offsets and dynamic offsets are required to be aligned to
// min_storage_buffer_offset_alignment (256 bytes by default).
//
// So, we work around this limitation by calculating an aligned offset
// and pass the remainder through a push constant.
//
// We could bind the whole buffer and only have to pass the offset
// through a push constant but we might run into the
// max_storage_buffer_binding_size limit.
//
// See the inner docs of `calculate_src_buffer_binding_size` to
// see how we get the appropriate `binding_size`.
let alignment = limits.min_storage_buffer_offset_alignment as u64;
let binding_size = calculate_src_buffer_binding_size(buffer_size, limits);
let aligned_offset = offset - offset % alignment;
// This works because `binding_size` is either `buffer_size` or `alignment * 2 + buffer_size % alignment`.
let max_aligned_offset = buffer_size - binding_size;
let aligned_offset = aligned_offset.min(max_aligned_offset);
let offset_remainder = offset - aligned_offset;
Params {
pipeline_layout: self.pipeline_layout.as_ref(),
pipeline: self.pipeline.as_ref(),
dst_buffer: self.dst_buffer.as_ref(),
dst_bind_group: self.dst_bind_group.as_ref(),
aligned_offset,
offset_remainder,
}
}
pub fn dispose(self, device: &dyn hal::DynDevice) {
let IndirectValidation {
module,
dst_bind_group_layout,
src_bind_group_layout,
pipeline_layout,
pipeline,
dst_buffer,
dst_bind_group,
} = self;
unsafe {
device.destroy_bind_group(dst_bind_group);
device.destroy_buffer(dst_buffer);
device.destroy_compute_pipeline(pipeline);
device.destroy_pipeline_layout(pipeline_layout);
device.destroy_bind_group_layout(src_bind_group_layout);
device.destroy_bind_group_layout(dst_bind_group_layout);
device.destroy_shader_module(module);
}
}
}
fn calculate_src_buffer_binding_size(buffer_size: u64, limits: &wgt::Limits) -> u64 {
let alignment = limits.min_storage_buffer_offset_alignment as u64;
// We need to choose a binding size that can address all possible sets of 12 contiguous bytes in the buffer taking
// into account that the dynamic offset needs to be a multiple of `min_storage_buffer_offset_alignment`.
// Given the know variables: `offset`, `buffer_size`, `alignment` and the rule `offset + 12 <= buffer_size`.
// Let `chunks = floor(buffer_size / alignment)`.
// Let `chunk` be the interval `[0, chunks]`.
// Let `offset = alignment * chunk + r` where `r` is the interval [0, alignment - 4].
// Let `binding` be the interval `[offset, offset + 12]`.
// Let `aligned_offset = alignment * chunk`.
// Let `aligned_binding` be the interval `[aligned_offset, aligned_offset + r + 12]`.
// Let `aligned_binding_size = r + 12 = [12, alignment + 8]`.
// Let `min_aligned_binding_size = alignment + 8`.
// `min_aligned_binding_size` is the minimum binding size required to address all 12 contiguous bytes in the buffer
// but the last aligned_offset + min_aligned_binding_size might overflow the buffer. In order to avoid this we must
// pick a larger `binding_size` that satisfies: `last_aligned_offset + binding_size = buffer_size` and
// `binding_size >= min_aligned_binding_size`.
// Let `buffer_size = alignment * chunks + sr` where `sr` is the interval [0, alignment - 4].
// Let `last_aligned_offset = alignment * (chunks - u)` where `u` is the interval [0, chunks].
// => `binding_size = buffer_size - last_aligned_offset`
// => `binding_size = alignment * chunks + sr - alignment * (chunks - u)`
// => `binding_size = alignment * chunks + sr - alignment * chunks + alignment * u`
// => `binding_size = sr + alignment * u`
// => `min_aligned_binding_size <= sr + alignment * u`
// => `alignment + 8 <= sr + alignment * u`
// => `u` must be at least 2
// => `binding_size = sr + alignment * 2`
let binding_size = 2 * alignment + (buffer_size % alignment);
binding_size.min(buffer_size)
}

View File

@@ -0,0 +1,39 @@
use super::{InitTracker, MemoryInitKind};
use crate::resource::Buffer;
use std::{ops::Range, sync::Arc};
#[derive(Debug, Clone)]
pub(crate) struct BufferInitTrackerAction {
pub buffer: Arc<Buffer>,
pub range: Range<wgt::BufferAddress>,
pub kind: MemoryInitKind,
}
pub(crate) type BufferInitTracker = InitTracker<wgt::BufferAddress>;
impl BufferInitTracker {
/// Checks if an action has/requires any effect on the initialization status
/// and shrinks its range if possible.
pub(crate) fn check_action(
&self,
action: &BufferInitTrackerAction,
) -> Option<BufferInitTrackerAction> {
self.create_action(&action.buffer, action.range.clone(), action.kind)
}
/// Creates an action if it would have any effect on the initialization
/// status and shrinks the range if possible.
pub(crate) fn create_action(
&self,
buffer: &Arc<Buffer>,
query_range: Range<wgt::BufferAddress>,
kind: MemoryInitKind,
) -> Option<BufferInitTrackerAction> {
self.check(query_range)
.map(|range| BufferInitTrackerAction {
buffer: buffer.clone(),
range,
kind,
})
}
}

425
vendor/wgpu-core/src/init_tracker/mod.rs vendored Normal file
View File

@@ -0,0 +1,425 @@
/*! Lazy initialization of texture and buffer memory.
The WebGPU specification requires all texture & buffer memory to be
zero initialized on first read. To avoid unnecessary inits, we track
the initialization status of every resource and perform inits lazily.
The granularity is different for buffers and textures:
- Buffer: Byte granularity to support usecases with large, partially
bound buffers well.
- Texture: Mip-level per layer. That is, a 2D surface is either
completely initialized or not, subrects are not tracked.
Every use of a buffer/texture generates a InitTrackerAction which are
recorded and later resolved at queue submit by merging them with the
current state and each other in execution order.
It is important to note that from the point of view of the memory init
system there are two kind of writes:
- **Full writes**: Any kind of memcpy operation. These cause a
`MemoryInitKind.ImplicitlyInitialized` action.
- **(Potentially) partial writes**: For example, write use in a
Shader. The system is not able to determine if a resource is fully
initialized afterwards but is no longer allowed to perform any
clears, therefore this leads to a
`MemoryInitKind.ImplicitlyInitialized` action, exactly like a read
would.
*/
use smallvec::SmallVec;
use std::{fmt, iter, ops::Range};
mod buffer;
mod texture;
pub(crate) use buffer::{BufferInitTracker, BufferInitTrackerAction};
pub(crate) use texture::{
has_copy_partial_init_tracker_coverage, TextureInitRange, TextureInitTracker,
TextureInitTrackerAction,
};
#[derive(Debug, Clone, Copy)]
pub(crate) enum MemoryInitKind {
// The memory range is going to be written by an already initialized source,
// thus doesn't need extra attention other than marking as initialized.
ImplicitlyInitialized,
// The memory range is going to be read, therefore needs to ensure prior
// initialization.
NeedsInitializedMemory,
}
// Most of the time a resource is either fully uninitialized (one element) or
// initialized (zero elements).
type UninitializedRangeVec<Idx> = SmallVec<[Range<Idx>; 1]>;
/// Tracks initialization status of a linear range from 0..size
#[derive(Debug, Clone)]
pub(crate) struct InitTracker<Idx: Ord + Copy + Default> {
/// Non-overlapping list of all uninitialized ranges, sorted by
/// range end.
uninitialized_ranges: UninitializedRangeVec<Idx>,
}
pub(crate) struct UninitializedIter<'a, Idx: fmt::Debug + Ord + Copy> {
uninitialized_ranges: &'a UninitializedRangeVec<Idx>,
drain_range: Range<Idx>,
next_index: usize,
}
impl<'a, Idx> Iterator for UninitializedIter<'a, Idx>
where
Idx: fmt::Debug + Ord + Copy,
{
type Item = Range<Idx>;
fn next(&mut self) -> Option<Self::Item> {
self.uninitialized_ranges
.get(self.next_index)
.and_then(|range| {
if range.start < self.drain_range.end {
self.next_index += 1;
Some(
range.start.max(self.drain_range.start)
..range.end.min(self.drain_range.end),
)
} else {
None
}
})
}
}
pub(crate) struct InitTrackerDrain<'a, Idx: fmt::Debug + Ord + Copy> {
uninitialized_ranges: &'a mut UninitializedRangeVec<Idx>,
drain_range: Range<Idx>,
first_index: usize,
next_index: usize,
}
impl<'a, Idx> Iterator for InitTrackerDrain<'a, Idx>
where
Idx: fmt::Debug + Ord + Copy,
{
type Item = Range<Idx>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(r) = self
.uninitialized_ranges
.get(self.next_index)
.and_then(|range| {
if range.start < self.drain_range.end {
Some(range.clone())
} else {
None
}
})
{
self.next_index += 1;
Some(r.start.max(self.drain_range.start)..r.end.min(self.drain_range.end))
} else {
let num_affected = self.next_index - self.first_index;
if num_affected == 0 {
return None;
}
let first_range = &mut self.uninitialized_ranges[self.first_index];
// Split one "big" uninitialized range?
if num_affected == 1
&& first_range.start < self.drain_range.start
&& first_range.end > self.drain_range.end
{
let old_start = first_range.start;
first_range.start = self.drain_range.end;
self.uninitialized_ranges
.insert(self.first_index, old_start..self.drain_range.start);
}
// Adjust border ranges and delete everything in-between.
else {
let remove_start = if first_range.start >= self.drain_range.start {
self.first_index
} else {
first_range.end = self.drain_range.start;
self.first_index + 1
};
let last_range = &mut self.uninitialized_ranges[self.next_index - 1];
let remove_end = if last_range.end <= self.drain_range.end {
self.next_index
} else {
last_range.start = self.drain_range.end;
self.next_index - 1
};
self.uninitialized_ranges.drain(remove_start..remove_end);
}
None
}
}
}
impl<'a, Idx> Drop for InitTrackerDrain<'a, Idx>
where
Idx: fmt::Debug + Ord + Copy,
{
fn drop(&mut self) {
if self.next_index <= self.first_index {
for _ in self {}
}
}
}
impl<Idx> InitTracker<Idx>
where
Idx: fmt::Debug + Ord + Copy + Default,
{
pub(crate) fn new(size: Idx) -> Self {
Self {
uninitialized_ranges: iter::once(Idx::default()..size).collect(),
}
}
/// Checks for uninitialized ranges within a given query range.
///
/// If `query_range` includes any uninitialized portions of this init
/// tracker's resource, return the smallest subrange of `query_range` that
/// covers all uninitialized regions.
///
/// The returned range may be larger than necessary, to keep this function
/// O(log n).
pub(crate) fn check(&self, query_range: Range<Idx>) -> Option<Range<Idx>> {
let index = self
.uninitialized_ranges
.partition_point(|r| r.end <= query_range.start);
self.uninitialized_ranges
.get(index)
.and_then(|start_range| {
if start_range.start < query_range.end {
let start = start_range.start.max(query_range.start);
match self.uninitialized_ranges.get(index + 1) {
Some(next_range) => {
if next_range.start < query_range.end {
// Would need to keep iterating for more
// accurate upper bound. Don't do that here.
Some(start..query_range.end)
} else {
Some(start..start_range.end.min(query_range.end))
}
}
None => Some(start..start_range.end.min(query_range.end)),
}
} else {
None
}
})
}
// Returns an iterator over the uninitialized ranges in a query range.
pub(crate) fn uninitialized(&mut self, drain_range: Range<Idx>) -> UninitializedIter<Idx> {
let index = self
.uninitialized_ranges
.partition_point(|r| r.end <= drain_range.start);
UninitializedIter {
drain_range,
uninitialized_ranges: &self.uninitialized_ranges,
next_index: index,
}
}
// Drains uninitialized ranges in a query range.
pub(crate) fn drain(&mut self, drain_range: Range<Idx>) -> InitTrackerDrain<Idx> {
let index = self
.uninitialized_ranges
.partition_point(|r| r.end <= drain_range.start);
InitTrackerDrain {
drain_range,
uninitialized_ranges: &mut self.uninitialized_ranges,
first_index: index,
next_index: index,
}
}
}
impl InitTracker<u32> {
// Makes a single entry uninitialized if not already uninitialized
#[allow(dead_code)]
pub(crate) fn discard(&mut self, pos: u32) {
// first range where end>=idx
let r_idx = self.uninitialized_ranges.partition_point(|r| r.end < pos);
if let Some(r) = self.uninitialized_ranges.get(r_idx) {
// Extend range at end
if r.end == pos {
// merge with next?
if let Some(right) = self.uninitialized_ranges.get(r_idx + 1) {
if right.start == pos + 1 {
self.uninitialized_ranges[r_idx] = r.start..right.end;
self.uninitialized_ranges.remove(r_idx + 1);
return;
}
}
self.uninitialized_ranges[r_idx] = r.start..(pos + 1);
} else if r.start > pos {
// may still extend range at beginning
if r.start == pos + 1 {
self.uninitialized_ranges[r_idx] = pos..r.end;
} else {
// previous range end must be smaller than idx, therefore no merge possible
self.uninitialized_ranges.push(pos..(pos + 1));
}
}
} else {
self.uninitialized_ranges.push(pos..(pos + 1));
}
}
}
#[cfg(test)]
mod test {
use std::ops::Range;
type Tracker = super::InitTracker<u32>;
#[test]
fn check_for_newly_created_tracker() {
let tracker = Tracker::new(10);
assert_eq!(tracker.check(0..10), Some(0..10));
assert_eq!(tracker.check(0..3), Some(0..3));
assert_eq!(tracker.check(3..4), Some(3..4));
assert_eq!(tracker.check(4..10), Some(4..10));
}
#[test]
fn check_for_drained_tracker() {
let mut tracker = Tracker::new(10);
tracker.drain(0..10);
assert_eq!(tracker.check(0..10), None);
assert_eq!(tracker.check(0..3), None);
assert_eq!(tracker.check(3..4), None);
assert_eq!(tracker.check(4..10), None);
}
#[test]
fn check_for_partially_filled_tracker() {
let mut tracker = Tracker::new(25);
// Two regions of uninitialized memory
tracker.drain(0..5);
tracker.drain(10..15);
tracker.drain(20..25);
assert_eq!(tracker.check(0..25), Some(5..25)); // entire range
assert_eq!(tracker.check(0..5), None); // left non-overlapping
assert_eq!(tracker.check(3..8), Some(5..8)); // left overlapping region
assert_eq!(tracker.check(3..17), Some(5..17)); // left overlapping region + contained region
// right overlapping region + contained region (yes, doesn't fix range end!)
assert_eq!(tracker.check(8..22), Some(8..22));
// right overlapping region
assert_eq!(tracker.check(17..22), Some(17..20));
// right non-overlapping
assert_eq!(tracker.check(20..25), None);
}
#[test]
fn drain_already_drained() {
let mut tracker = Tracker::new(30);
tracker.drain(10..20);
// Overlapping with non-cleared
tracker.drain(5..15); // Left overlap
tracker.drain(15..25); // Right overlap
tracker.drain(0..30); // Inner overlap
// Clear fully cleared
tracker.drain(0..30);
assert_eq!(tracker.check(0..30), None);
}
#[test]
fn drain_never_returns_ranges_twice_for_same_range() {
let mut tracker = Tracker::new(19);
assert_eq!(tracker.drain(0..19).count(), 1);
assert_eq!(tracker.drain(0..19).count(), 0);
let mut tracker = Tracker::new(17);
assert_eq!(tracker.drain(5..8).count(), 1);
assert_eq!(tracker.drain(5..8).count(), 0);
assert_eq!(tracker.drain(1..3).count(), 1);
assert_eq!(tracker.drain(1..3).count(), 0);
assert_eq!(tracker.drain(7..13).count(), 1);
assert_eq!(tracker.drain(7..13).count(), 0);
}
#[test]
fn drain_splits_ranges_correctly() {
let mut tracker = Tracker::new(1337);
assert_eq!(
tracker.drain(21..42).collect::<Vec<Range<u32>>>(),
vec![21..42]
);
assert_eq!(
tracker.drain(900..1000).collect::<Vec<Range<u32>>>(),
vec![900..1000]
);
// Split ranges.
assert_eq!(
tracker.drain(5..1003).collect::<Vec<Range<u32>>>(),
vec![5..21, 42..900, 1000..1003]
);
assert_eq!(
tracker.drain(0..1337).collect::<Vec<Range<u32>>>(),
vec![0..5, 1003..1337]
);
}
#[test]
fn discard_adds_range_on_cleared() {
let mut tracker = Tracker::new(10);
tracker.drain(0..10);
tracker.discard(0);
tracker.discard(5);
tracker.discard(9);
assert_eq!(tracker.check(0..1), Some(0..1));
assert_eq!(tracker.check(1..5), None);
assert_eq!(tracker.check(5..6), Some(5..6));
assert_eq!(tracker.check(6..9), None);
assert_eq!(tracker.check(9..10), Some(9..10));
}
#[test]
fn discard_does_nothing_on_uncleared() {
let mut tracker = Tracker::new(10);
tracker.discard(0);
tracker.discard(5);
tracker.discard(9);
assert_eq!(tracker.uninitialized_ranges.len(), 1);
assert_eq!(tracker.uninitialized_ranges[0], 0..10);
}
#[test]
fn discard_extends_ranges() {
let mut tracker = Tracker::new(10);
tracker.drain(3..7);
tracker.discard(2);
tracker.discard(7);
assert_eq!(tracker.uninitialized_ranges.len(), 2);
assert_eq!(tracker.uninitialized_ranges[0], 0..3);
assert_eq!(tracker.uninitialized_ranges[1], 7..10);
}
#[test]
fn discard_merges_ranges() {
let mut tracker = Tracker::new(10);
tracker.drain(3..4);
tracker.discard(3);
assert_eq!(tracker.uninitialized_ranges.len(), 1);
assert_eq!(tracker.uninitialized_ranges[0], 0..10);
}
}

View File

@@ -0,0 +1,103 @@
use super::{InitTracker, MemoryInitKind};
use crate::{resource::Texture, track::TextureSelector};
use arrayvec::ArrayVec;
use std::{ops::Range, sync::Arc};
#[derive(Debug, Clone)]
pub(crate) struct TextureInitRange {
pub(crate) mip_range: Range<u32>,
// Strictly array layers. We do *not* track volume slices separately.
pub(crate) layer_range: Range<u32>,
}
// Returns true if a copy operation doesn't fully cover the texture init
// tracking granularity. I.e. if this function returns true for a pending copy
// operation, the target texture needs to be ensured to be initialized first!
pub(crate) fn has_copy_partial_init_tracker_coverage(
copy_size: &wgt::Extent3d,
mip_level: u32,
desc: &wgt::TextureDescriptor<(), Vec<wgt::TextureFormat>>,
) -> bool {
let target_size = desc.mip_level_size(mip_level).unwrap();
copy_size.width != target_size.width
|| copy_size.height != target_size.height
|| (desc.dimension == wgt::TextureDimension::D3
&& copy_size.depth_or_array_layers != target_size.depth_or_array_layers)
}
impl From<TextureSelector> for TextureInitRange {
fn from(selector: TextureSelector) -> Self {
TextureInitRange {
mip_range: selector.mips,
layer_range: selector.layers,
}
}
}
#[derive(Debug, Clone)]
pub(crate) struct TextureInitTrackerAction {
pub(crate) texture: Arc<Texture>,
pub(crate) range: TextureInitRange,
pub(crate) kind: MemoryInitKind,
}
pub(crate) type TextureLayerInitTracker = InitTracker<u32>;
#[derive(Debug)]
pub(crate) struct TextureInitTracker {
pub mips: ArrayVec<TextureLayerInitTracker, { hal::MAX_MIP_LEVELS as usize }>,
}
impl TextureInitTracker {
pub(crate) fn new(mip_level_count: u32, depth_or_array_layers: u32) -> Self {
TextureInitTracker {
mips: std::iter::repeat(TextureLayerInitTracker::new(depth_or_array_layers))
.take(mip_level_count as usize)
.collect(),
}
}
pub(crate) fn check_action(
&self,
action: &TextureInitTrackerAction,
) -> Option<TextureInitTrackerAction> {
let mut mip_range_start = usize::MAX;
let mut mip_range_end = usize::MIN;
let mut layer_range_start = u32::MAX;
let mut layer_range_end = u32::MIN;
for (i, mip_tracker) in self
.mips
.iter()
.enumerate()
.take(action.range.mip_range.end as usize)
.skip(action.range.mip_range.start as usize)
{
if let Some(uninitialized_layer_range) =
mip_tracker.check(action.range.layer_range.clone())
{
mip_range_start = mip_range_start.min(i);
mip_range_end = i + 1;
layer_range_start = layer_range_start.min(uninitialized_layer_range.start);
layer_range_end = layer_range_end.max(uninitialized_layer_range.end);
};
}
if mip_range_start < mip_range_end && layer_range_start < layer_range_end {
Some(TextureInitTrackerAction {
texture: action.texture.clone(),
range: TextureInitRange {
mip_range: mip_range_start as u32..mip_range_end as u32,
layer_range: layer_range_start..layer_range_end,
},
kind: action.kind,
})
} else {
None
}
}
pub(crate) fn discard(&mut self, mip_level: u32, layer: u32) {
self.mips[mip_level as usize].discard(layer);
}
}

987
vendor/wgpu-core/src/instance.rs vendored Normal file
View File

@@ -0,0 +1,987 @@
use std::sync::Arc;
use std::{borrow::Cow, collections::HashMap};
use crate::{
api_log, api_log_debug,
device::{queue::Queue, resource::Device, DeviceDescriptor, DeviceError},
global::Global,
hal_api::HalApi,
id::{markers, AdapterId, DeviceId, QueueId, SurfaceId},
lock::{rank, Mutex},
present::Presentation,
resource::ResourceType,
resource_log, DOWNLEVEL_WARNING_MESSAGE,
};
use wgt::{Backend, Backends, PowerPreference};
use thiserror::Error;
pub type RequestAdapterOptions = wgt::RequestAdapterOptions<SurfaceId>;
#[derive(Clone, Debug, Error)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[error("Limit '{name}' value {requested} is better than allowed {allowed}")]
pub struct FailedLimit {
name: Cow<'static, str>,
requested: u64,
allowed: u64,
}
fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLimit> {
let mut failed = Vec::new();
requested.check_limits_with_fail_fn(allowed, false, |name, requested, allowed| {
failed.push(FailedLimit {
name: Cow::Borrowed(name),
requested,
allowed,
})
});
failed
}
#[test]
fn downlevel_default_limits_less_than_default_limits() {
let res = check_limits(&wgt::Limits::downlevel_defaults(), &wgt::Limits::default());
assert!(
res.is_empty(),
"Downlevel limits are greater than default limits",
)
}
#[derive(Default)]
pub struct Instance {
#[allow(dead_code)]
pub name: String,
/// List of instances per backend.
///
/// The ordering in this list implies prioritization and needs to be preserved.
pub instance_per_backend: Vec<(Backend, Box<dyn hal::DynInstance>)>,
pub flags: wgt::InstanceFlags,
}
impl Instance {
pub fn new(name: &str, instance_desc: &wgt::InstanceDescriptor) -> Self {
fn init<A: HalApi>(
_: A,
instance_desc: &wgt::InstanceDescriptor,
instance_per_backend: &mut Vec<(Backend, Box<dyn hal::DynInstance>)>,
) {
if instance_desc.backends.contains(A::VARIANT.into()) {
let hal_desc = hal::InstanceDescriptor {
name: "wgpu",
flags: instance_desc.flags,
dx12_shader_compiler: instance_desc
.backend_options
.dx12
.shader_compiler
.clone(),
gles_minor_version: instance_desc.backend_options.gl.gles_minor_version,
};
use hal::Instance as _;
match unsafe { A::Instance::init(&hal_desc) } {
Ok(instance) => {
log::debug!("Instance::new: created {:?} backend", A::VARIANT);
instance_per_backend.push((A::VARIANT, Box::new(instance)));
}
Err(err) => {
log::debug!(
"Instance::new: failed to create {:?} backend: {:?}",
A::VARIANT,
err
);
}
}
} else {
log::trace!("Instance::new: backend {:?} not requested", A::VARIANT);
}
}
let mut instance_per_backend = Vec::new();
#[cfg(vulkan)]
init(hal::api::Vulkan, instance_desc, &mut instance_per_backend);
#[cfg(metal)]
init(hal::api::Metal, instance_desc, &mut instance_per_backend);
#[cfg(dx12)]
init(hal::api::Dx12, instance_desc, &mut instance_per_backend);
#[cfg(gles)]
init(hal::api::Gles, instance_desc, &mut instance_per_backend);
Self {
name: name.to_string(),
instance_per_backend,
flags: instance_desc.flags,
}
}
pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynInstance> {
self.instance_per_backend
.iter()
.find_map(|(instance_backend, instance)| {
(*instance_backend == backend).then(|| instance.as_ref())
})
}
/// # Safety
///
/// - The raw instance handle returned must not be manually destroyed.
pub unsafe fn as_hal<A: HalApi>(&self) -> Option<&A::Instance> {
self.raw(A::VARIANT).map(|instance| {
instance
.as_any()
.downcast_ref()
// This should be impossible. It would mean that backend instance and enum type are mismatching.
.expect("Stored instance is not of the correct type")
})
}
/// Creates a new surface targeting the given display/window handles.
///
/// Internally attempts to create hal surfaces for all enabled backends.
///
/// Fails only if creation for surfaces for all enabled backends fails in which case
/// the error for each enabled backend is listed.
/// Vice versa, if creation for any backend succeeds, success is returned.
/// Surface creation errors are logged to the debug log in any case.
///
/// # Safety
///
/// - `display_handle` must be a valid object to create a surface upon.
/// - `window_handle` must remain valid as long as the returned
/// [`SurfaceId`] is being used.
#[cfg(feature = "raw-window-handle")]
pub unsafe fn create_surface(
&self,
display_handle: raw_window_handle::RawDisplayHandle,
window_handle: raw_window_handle::RawWindowHandle,
) -> Result<Surface, CreateSurfaceError> {
profiling::scope!("Instance::create_surface");
let mut errors = HashMap::default();
let mut surface_per_backend = HashMap::default();
for (backend, instance) in &self.instance_per_backend {
match unsafe {
instance
.as_ref()
.create_surface(display_handle, window_handle)
} {
Ok(raw) => {
surface_per_backend.insert(*backend, raw);
}
Err(err) => {
log::debug!(
"Instance::create_surface: failed to create surface for {:?}: {:?}",
backend,
err
);
errors.insert(*backend, err);
}
}
}
if surface_per_backend.is_empty() {
Err(CreateSurfaceError::FailedToCreateSurfaceForAnyBackend(
errors,
))
} else {
let surface = Surface {
presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
surface_per_backend,
};
Ok(surface)
}
}
/// # Safety
///
/// `layer` must be a valid pointer.
#[cfg(metal)]
pub unsafe fn create_surface_metal(
&self,
layer: *mut std::ffi::c_void,
) -> Result<Surface, CreateSurfaceError> {
profiling::scope!("Instance::create_surface_metal");
let instance = unsafe { self.as_hal::<hal::api::Metal>() }
.ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Metal))?;
let layer = layer.cast();
// SAFETY: We do this cast and deref. (rather than using `metal` to get the
// object we want) to avoid direct coupling on the `metal` crate.
//
// To wit, this pointer…
//
// - …is properly aligned.
// - …is dereferenceable to a `MetalLayerRef` as an invariant of the `metal`
// field.
// - …points to an _initialized_ `MetalLayerRef`.
// - …is only ever aliased via an immutable reference that lives within this
// lexical scope.
let layer = unsafe { &*layer };
let raw_surface: Box<dyn hal::DynSurface> =
Box::new(instance.create_surface_from_layer(layer));
let surface = Surface {
presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
surface_per_backend: std::iter::once((Backend::Metal, raw_surface)).collect(),
};
Ok(surface)
}
#[cfg(dx12)]
fn create_surface_dx12(
&self,
create_surface_func: impl FnOnce(&hal::dx12::Instance) -> hal::dx12::Surface,
) -> Result<Surface, CreateSurfaceError> {
let instance = unsafe { self.as_hal::<hal::api::Dx12>() }
.ok_or(CreateSurfaceError::BackendNotEnabled(Backend::Dx12))?;
let surface: Box<dyn hal::DynSurface> = Box::new(create_surface_func(instance));
let surface = Surface {
presentation: Mutex::new(rank::SURFACE_PRESENTATION, None),
surface_per_backend: std::iter::once((Backend::Dx12, surface)).collect(),
};
Ok(surface)
}
#[cfg(dx12)]
/// # Safety
///
/// The visual must be valid and able to be used to make a swapchain with.
pub unsafe fn create_surface_from_visual(
&self,
visual: *mut std::ffi::c_void,
) -> Result<Surface, CreateSurfaceError> {
profiling::scope!("Instance::instance_create_surface_from_visual");
self.create_surface_dx12(|inst| unsafe { inst.create_surface_from_visual(visual) })
}
#[cfg(dx12)]
/// # Safety
///
/// The surface_handle must be valid and able to be used to make a swapchain with.
pub unsafe fn create_surface_from_surface_handle(
&self,
surface_handle: *mut std::ffi::c_void,
) -> Result<Surface, CreateSurfaceError> {
profiling::scope!("Instance::instance_create_surface_from_surface_handle");
self.create_surface_dx12(|inst| unsafe {
inst.create_surface_from_surface_handle(surface_handle)
})
}
#[cfg(dx12)]
/// # Safety
///
/// The swap_chain_panel must be valid and able to be used to make a swapchain with.
pub unsafe fn create_surface_from_swap_chain_panel(
&self,
swap_chain_panel: *mut std::ffi::c_void,
) -> Result<Surface, CreateSurfaceError> {
profiling::scope!("Instance::instance_create_surface_from_swap_chain_panel");
self.create_surface_dx12(|inst| unsafe {
inst.create_surface_from_swap_chain_panel(swap_chain_panel)
})
}
pub fn enumerate_adapters(&self, backends: Backends) -> Vec<Adapter> {
profiling::scope!("Instance::enumerate_adapters");
api_log!("Instance::enumerate_adapters");
let mut adapters = Vec::new();
for (_backend, instance) in self
.instance_per_backend
.iter()
.filter(|(backend, _)| backends.contains(Backends::from(*backend)))
{
// NOTE: We might be using `profiling` without any features. The empty backend of this
// macro emits no code, so unused code linting changes depending on the backend.
profiling::scope!("enumerating", &*format!("{:?}", _backend));
let hal_adapters = unsafe { instance.enumerate_adapters(None) };
for raw in hal_adapters {
let adapter = Adapter::new(raw);
api_log_debug!("Adapter {:?}", adapter.raw.info);
adapters.push(adapter);
}
}
adapters
}
pub fn request_adapter(
&self,
desc: &wgt::RequestAdapterOptions<&Surface>,
backends: Backends,
) -> Result<Adapter, RequestAdapterError> {
profiling::scope!("Instance::request_adapter");
api_log!("Instance::request_adapter");
let mut adapters = Vec::new();
for (backend, instance) in self
.instance_per_backend
.iter()
.filter(|(backend, _)| backends.contains(Backends::from(*backend)))
{
let compatible_hal_surface = desc
.compatible_surface
.and_then(|surface| surface.raw(*backend));
let mut backend_adapters =
unsafe { instance.enumerate_adapters(compatible_hal_surface) };
if desc.force_fallback_adapter {
backend_adapters.retain(|exposed| exposed.info.device_type == wgt::DeviceType::Cpu);
}
if let Some(surface) = desc.compatible_surface {
backend_adapters.retain(|exposed| {
let capabilities = surface.get_capabilities_with_raw(exposed);
if let Err(err) = capabilities {
log::debug!(
"Adapter {:?} not compatible with surface: {}",
exposed.info,
err
);
false
} else {
true
}
});
}
adapters.extend(backend_adapters);
}
match desc.power_preference {
PowerPreference::LowPower => {
sort(&mut adapters, true);
}
PowerPreference::HighPerformance => {
sort(&mut adapters, false);
}
PowerPreference::None => {}
};
fn sort(adapters: &mut [hal::DynExposedAdapter], prefer_integrated_gpu: bool) {
adapters.sort_by(|a, b| {
get_order(a.info.device_type, prefer_integrated_gpu)
.cmp(&get_order(b.info.device_type, prefer_integrated_gpu))
});
}
fn get_order(device_type: wgt::DeviceType, prefer_integrated_gpu: bool) -> u8 {
// Since devices of type "Other" might really be "Unknown" and come
// from APIs like OpenGL that don't specify device type, Prefer more
// Specific types over Other.
//
// This means that backends which do provide accurate device types
// will be preferred if their device type indicates an actual
// hardware GPU (integrated or discrete).
match device_type {
wgt::DeviceType::DiscreteGpu if prefer_integrated_gpu => 2,
wgt::DeviceType::IntegratedGpu if prefer_integrated_gpu => 1,
wgt::DeviceType::DiscreteGpu => 1,
wgt::DeviceType::IntegratedGpu => 2,
wgt::DeviceType::Other => 3,
wgt::DeviceType::VirtualGpu => 4,
wgt::DeviceType::Cpu => 5,
}
}
// `request_adapter` can be a bit of a black box.
// Shine some light on its decision in debug log.
if adapters.is_empty() {
log::debug!("Request adapter didn't find compatible adapters.");
} else {
log::debug!(
"Found {} compatible adapters. Sorted by preference:",
adapters.len()
);
for adapter in &adapters {
log::debug!("* {:?}", adapter.info);
}
}
if let Some(adapter) = adapters.into_iter().next() {
api_log_debug!("Request adapter result {:?}", adapter.info);
let adapter = Adapter::new(adapter);
Ok(adapter)
} else {
Err(RequestAdapterError::NotFound)
}
}
}
pub struct Surface {
pub(crate) presentation: Mutex<Option<Presentation>>,
pub surface_per_backend: HashMap<Backend, Box<dyn hal::DynSurface>>,
}
impl ResourceType for Surface {
const TYPE: &'static str = "Surface";
}
impl crate::storage::StorageItem for Surface {
type Marker = markers::Surface;
}
impl Surface {
pub fn get_capabilities(
&self,
adapter: &Adapter,
) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
self.get_capabilities_with_raw(&adapter.raw)
}
pub fn get_capabilities_with_raw(
&self,
adapter: &hal::DynExposedAdapter,
) -> Result<hal::SurfaceCapabilities, GetSurfaceSupportError> {
let backend = adapter.backend();
let suf = self
.raw(backend)
.ok_or(GetSurfaceSupportError::NotSupportedByBackend(backend))?;
profiling::scope!("surface_capabilities");
let caps = unsafe { adapter.adapter.surface_capabilities(suf) }
.ok_or(GetSurfaceSupportError::FailedToRetrieveSurfaceCapabilitiesForAdapter)?;
Ok(caps)
}
pub fn raw(&self, backend: Backend) -> Option<&dyn hal::DynSurface> {
self.surface_per_backend
.get(&backend)
.map(|surface| surface.as_ref())
}
}
impl Drop for Surface {
fn drop(&mut self) {
if let Some(present) = self.presentation.lock().take() {
for (&backend, surface) in &self.surface_per_backend {
if backend == present.device.backend() {
unsafe { surface.unconfigure(present.device.raw()) };
}
}
}
}
}
pub struct Adapter {
pub(crate) raw: hal::DynExposedAdapter,
}
impl Adapter {
pub fn new(mut raw: hal::DynExposedAdapter) -> Self {
// WebGPU requires this offset alignment as lower bound on all adapters.
const MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND: u32 = 32;
let limits = &mut raw.capabilities.limits;
limits.min_uniform_buffer_offset_alignment = limits
.min_uniform_buffer_offset_alignment
.max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND);
limits.min_storage_buffer_offset_alignment = limits
.min_storage_buffer_offset_alignment
.max(MIN_BUFFER_OFFSET_ALIGNMENT_LOWER_BOUND);
Self { raw }
}
/// Returns the backend this adapter is using.
pub fn backend(&self) -> Backend {
self.raw.backend()
}
pub fn is_surface_supported(&self, surface: &Surface) -> bool {
// If get_capabilities returns Err, then the API does not advertise support for the surface.
//
// This could occur if the user is running their app on Wayland but Vulkan does not support
// VK_KHR_wayland_surface.
surface.get_capabilities(self).is_ok()
}
pub fn get_info(&self) -> wgt::AdapterInfo {
self.raw.info.clone()
}
pub fn features(&self) -> wgt::Features {
self.raw.features
}
pub fn limits(&self) -> wgt::Limits {
self.raw.capabilities.limits.clone()
}
pub fn downlevel_capabilities(&self) -> wgt::DownlevelCapabilities {
self.raw.capabilities.downlevel.clone()
}
pub fn get_presentation_timestamp(&self) -> wgt::PresentationTimestamp {
unsafe { self.raw.adapter.get_presentation_timestamp() }
}
pub fn get_texture_format_features(
&self,
format: wgt::TextureFormat,
) -> wgt::TextureFormatFeatures {
use hal::TextureFormatCapabilities as Tfc;
let caps = unsafe { self.raw.adapter.texture_format_capabilities(format) };
let mut allowed_usages = wgt::TextureUsages::empty();
allowed_usages.set(wgt::TextureUsages::COPY_SRC, caps.contains(Tfc::COPY_SRC));
allowed_usages.set(wgt::TextureUsages::COPY_DST, caps.contains(Tfc::COPY_DST));
allowed_usages.set(
wgt::TextureUsages::TEXTURE_BINDING,
caps.contains(Tfc::SAMPLED),
);
allowed_usages.set(
wgt::TextureUsages::STORAGE_BINDING,
caps.intersects(
Tfc::STORAGE_WRITE_ONLY
| Tfc::STORAGE_READ_ONLY
| Tfc::STORAGE_READ_WRITE
| Tfc::STORAGE_ATOMIC,
),
);
allowed_usages.set(
wgt::TextureUsages::RENDER_ATTACHMENT,
caps.intersects(Tfc::COLOR_ATTACHMENT | Tfc::DEPTH_STENCIL_ATTACHMENT),
);
allowed_usages.set(
wgt::TextureUsages::STORAGE_ATOMIC,
caps.contains(Tfc::STORAGE_ATOMIC),
);
let mut flags = wgt::TextureFormatFeatureFlags::empty();
flags.set(
wgt::TextureFormatFeatureFlags::STORAGE_READ_ONLY,
caps.contains(Tfc::STORAGE_READ_ONLY),
);
flags.set(
wgt::TextureFormatFeatureFlags::STORAGE_WRITE_ONLY,
caps.contains(Tfc::STORAGE_WRITE_ONLY),
);
flags.set(
wgt::TextureFormatFeatureFlags::STORAGE_READ_WRITE,
caps.contains(Tfc::STORAGE_READ_WRITE),
);
flags.set(
wgt::TextureFormatFeatureFlags::STORAGE_ATOMIC,
caps.contains(Tfc::STORAGE_ATOMIC),
);
flags.set(
wgt::TextureFormatFeatureFlags::FILTERABLE,
caps.contains(Tfc::SAMPLED_LINEAR),
);
flags.set(
wgt::TextureFormatFeatureFlags::BLENDABLE,
caps.contains(Tfc::COLOR_ATTACHMENT_BLEND),
);
flags.set(
wgt::TextureFormatFeatureFlags::MULTISAMPLE_X2,
caps.contains(Tfc::MULTISAMPLE_X2),
);
flags.set(
wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4,
caps.contains(Tfc::MULTISAMPLE_X4),
);
flags.set(
wgt::TextureFormatFeatureFlags::MULTISAMPLE_X8,
caps.contains(Tfc::MULTISAMPLE_X8),
);
flags.set(
wgt::TextureFormatFeatureFlags::MULTISAMPLE_X16,
caps.contains(Tfc::MULTISAMPLE_X16),
);
flags.set(
wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE,
caps.contains(Tfc::MULTISAMPLE_RESOLVE),
);
wgt::TextureFormatFeatures {
allowed_usages,
flags,
}
}
#[allow(clippy::type_complexity)]
fn create_device_and_queue_from_hal(
self: &Arc<Self>,
hal_device: hal::DynOpenDevice,
desc: &DeviceDescriptor,
instance_flags: wgt::InstanceFlags,
trace_path: Option<&std::path::Path>,
) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
api_log!("Adapter::create_device");
let device = Device::new(hal_device.device, self, desc, trace_path, instance_flags)?;
let device = Arc::new(device);
let queue = Queue::new(device.clone(), hal_device.queue)?;
let queue = Arc::new(queue);
device.set_queue(&queue);
Ok((device, queue))
}
pub fn create_device_and_queue(
self: &Arc<Self>,
desc: &DeviceDescriptor,
instance_flags: wgt::InstanceFlags,
trace_path: Option<&std::path::Path>,
) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
// Verify all features were exposed by the adapter
if !self.raw.features.contains(desc.required_features) {
return Err(RequestDeviceError::UnsupportedFeature(
desc.required_features - self.raw.features,
));
}
let caps = &self.raw.capabilities;
if Backends::PRIMARY.contains(Backends::from(self.backend()))
&& !caps.downlevel.is_webgpu_compliant()
{
let missing_flags = wgt::DownlevelFlags::compliant() - caps.downlevel.flags;
log::warn!(
"Missing downlevel flags: {:?}\n{}",
missing_flags,
DOWNLEVEL_WARNING_MESSAGE
);
log::warn!("{:#?}", caps.downlevel);
}
// Verify feature preconditions
if desc
.required_features
.contains(wgt::Features::MAPPABLE_PRIMARY_BUFFERS)
&& self.raw.info.device_type == wgt::DeviceType::DiscreteGpu
{
log::warn!(
"Feature MAPPABLE_PRIMARY_BUFFERS enabled on a discrete gpu. \
This is a massive performance footgun and likely not what you wanted"
);
}
if let Some(failed) = check_limits(&desc.required_limits, &caps.limits).pop() {
return Err(RequestDeviceError::LimitsExceeded(failed));
}
let open = unsafe {
self.raw.adapter.open(
desc.required_features,
&desc.required_limits,
&desc.memory_hints,
)
}
.map_err(DeviceError::from_hal)?;
self.create_device_and_queue_from_hal(open, desc, instance_flags, trace_path)
}
}
crate::impl_resource_type!(Adapter);
crate::impl_storage_item!(Adapter);
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum GetSurfaceSupportError {
#[error("Surface is not supported for the specified backend {0}")]
NotSupportedByBackend(Backend),
#[error("Failed to retrieve surface capabilities for the specified adapter.")]
FailedToRetrieveSurfaceCapabilitiesForAdapter,
}
#[derive(Clone, Debug, Error)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
/// Error when requesting a device from the adapter
#[non_exhaustive]
pub enum RequestDeviceError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
LimitsExceeded(#[from] FailedLimit),
#[error("Unsupported features were requested: {0:?}")]
UnsupportedFeature(wgt::Features),
}
#[derive(Clone, Debug, Error)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[non_exhaustive]
pub enum RequestAdapterError {
#[error("No suitable adapter found")]
NotFound,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreateSurfaceError {
#[error("The backend {0} was not enabled on the instance.")]
BackendNotEnabled(Backend),
#[error("Failed to create surface for any enabled backend: {0:?}")]
FailedToCreateSurfaceForAnyBackend(HashMap<Backend, hal::InstanceError>),
}
impl Global {
/// Creates a new surface targeting the given display/window handles.
///
/// Internally attempts to create hal surfaces for all enabled backends.
///
/// Fails only if creation for surfaces for all enabled backends fails in which case
/// the error for each enabled backend is listed.
/// Vice versa, if creation for any backend succeeds, success is returned.
/// Surface creation errors are logged to the debug log in any case.
///
/// id_in:
/// - If `Some`, the id to assign to the surface. A new one will be generated otherwise.
///
/// # Safety
///
/// - `display_handle` must be a valid object to create a surface upon.
/// - `window_handle` must remain valid as long as the returned
/// [`SurfaceId`] is being used.
#[cfg(feature = "raw-window-handle")]
pub unsafe fn instance_create_surface(
&self,
display_handle: raw_window_handle::RawDisplayHandle,
window_handle: raw_window_handle::RawWindowHandle,
id_in: Option<SurfaceId>,
) -> Result<SurfaceId, CreateSurfaceError> {
let surface = unsafe { self.instance.create_surface(display_handle, window_handle) }?;
let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
Ok(id)
}
/// # Safety
///
/// `layer` must be a valid pointer.
#[cfg(metal)]
pub unsafe fn instance_create_surface_metal(
&self,
layer: *mut std::ffi::c_void,
id_in: Option<SurfaceId>,
) -> Result<SurfaceId, CreateSurfaceError> {
let surface = unsafe { self.instance.create_surface_metal(layer) }?;
let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
Ok(id)
}
#[cfg(dx12)]
/// # Safety
///
/// The visual must be valid and able to be used to make a swapchain with.
pub unsafe fn instance_create_surface_from_visual(
&self,
visual: *mut std::ffi::c_void,
id_in: Option<SurfaceId>,
) -> Result<SurfaceId, CreateSurfaceError> {
let surface = unsafe { self.instance.create_surface_from_visual(visual) }?;
let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
Ok(id)
}
#[cfg(dx12)]
/// # Safety
///
/// The surface_handle must be valid and able to be used to make a swapchain with.
pub unsafe fn instance_create_surface_from_surface_handle(
&self,
surface_handle: *mut std::ffi::c_void,
id_in: Option<SurfaceId>,
) -> Result<SurfaceId, CreateSurfaceError> {
let surface = unsafe {
self.instance
.create_surface_from_surface_handle(surface_handle)
}?;
let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
Ok(id)
}
#[cfg(dx12)]
/// # Safety
///
/// The swap_chain_panel must be valid and able to be used to make a swapchain with.
pub unsafe fn instance_create_surface_from_swap_chain_panel(
&self,
swap_chain_panel: *mut std::ffi::c_void,
id_in: Option<SurfaceId>,
) -> Result<SurfaceId, CreateSurfaceError> {
let surface = unsafe {
self.instance
.create_surface_from_swap_chain_panel(swap_chain_panel)
}?;
let id = self.surfaces.prepare(id_in).assign(Arc::new(surface));
Ok(id)
}
pub fn surface_drop(&self, id: SurfaceId) {
profiling::scope!("Surface::drop");
api_log!("Surface::drop {id:?}");
self.surfaces.remove(id);
}
pub fn enumerate_adapters(&self, backends: Backends) -> Vec<AdapterId> {
let adapters = self.instance.enumerate_adapters(backends);
adapters
.into_iter()
.map(|adapter| self.hub.adapters.prepare(None).assign(Arc::new(adapter)))
.collect()
}
pub fn request_adapter(
&self,
desc: &RequestAdapterOptions,
backends: Backends,
id_in: Option<AdapterId>,
) -> Result<AdapterId, RequestAdapterError> {
let compatible_surface = desc.compatible_surface.map(|id| self.surfaces.get(id));
let desc = wgt::RequestAdapterOptions {
power_preference: desc.power_preference,
force_fallback_adapter: desc.force_fallback_adapter,
compatible_surface: compatible_surface.as_deref(),
};
let adapter = self.instance.request_adapter(&desc, backends)?;
let id = self.hub.adapters.prepare(id_in).assign(Arc::new(adapter));
Ok(id)
}
/// # Safety
///
/// `hal_adapter` must be created from this global internal instance handle.
pub unsafe fn create_adapter_from_hal(
&self,
hal_adapter: hal::DynExposedAdapter,
input: Option<AdapterId>,
) -> AdapterId {
profiling::scope!("Instance::create_adapter_from_hal");
let fid = self.hub.adapters.prepare(input);
let id = fid.assign(Arc::new(Adapter::new(hal_adapter)));
resource_log!("Created Adapter {:?}", id);
id
}
pub fn adapter_get_info(&self, adapter_id: AdapterId) -> wgt::AdapterInfo {
let adapter = self.hub.adapters.get(adapter_id);
adapter.get_info()
}
pub fn adapter_get_texture_format_features(
&self,
adapter_id: AdapterId,
format: wgt::TextureFormat,
) -> wgt::TextureFormatFeatures {
let adapter = self.hub.adapters.get(adapter_id);
adapter.get_texture_format_features(format)
}
pub fn adapter_features(&self, adapter_id: AdapterId) -> wgt::Features {
let adapter = self.hub.adapters.get(adapter_id);
adapter.features()
}
pub fn adapter_limits(&self, adapter_id: AdapterId) -> wgt::Limits {
let adapter = self.hub.adapters.get(adapter_id);
adapter.limits()
}
pub fn adapter_downlevel_capabilities(
&self,
adapter_id: AdapterId,
) -> wgt::DownlevelCapabilities {
let adapter = self.hub.adapters.get(adapter_id);
adapter.downlevel_capabilities()
}
pub fn adapter_get_presentation_timestamp(
&self,
adapter_id: AdapterId,
) -> wgt::PresentationTimestamp {
let adapter = self.hub.adapters.get(adapter_id);
adapter.get_presentation_timestamp()
}
pub fn adapter_drop(&self, adapter_id: AdapterId) {
profiling::scope!("Adapter::drop");
api_log!("Adapter::drop {adapter_id:?}");
self.hub.adapters.remove(adapter_id);
}
}
impl Global {
pub fn adapter_request_device(
&self,
adapter_id: AdapterId,
desc: &DeviceDescriptor,
trace_path: Option<&std::path::Path>,
device_id_in: Option<DeviceId>,
queue_id_in: Option<QueueId>,
) -> Result<(DeviceId, QueueId), RequestDeviceError> {
profiling::scope!("Adapter::request_device");
api_log!("Adapter::request_device");
let device_fid = self.hub.devices.prepare(device_id_in);
let queue_fid = self.hub.queues.prepare(queue_id_in);
let adapter = self.hub.adapters.get(adapter_id);
let (device, queue) =
adapter.create_device_and_queue(desc, self.instance.flags, trace_path)?;
let device_id = device_fid.assign(device);
resource_log!("Created Device {:?}", device_id);
let queue_id = queue_fid.assign(queue);
resource_log!("Created Queue {:?}", queue_id);
Ok((device_id, queue_id))
}
/// # Safety
///
/// - `hal_device` must be created from `adapter_id` or its internal handle.
/// - `desc` must be a subset of `hal_device` features and limits.
pub unsafe fn create_device_from_hal(
&self,
adapter_id: AdapterId,
hal_device: hal::DynOpenDevice,
desc: &DeviceDescriptor,
trace_path: Option<&std::path::Path>,
device_id_in: Option<DeviceId>,
queue_id_in: Option<QueueId>,
) -> Result<(DeviceId, QueueId), RequestDeviceError> {
profiling::scope!("Global::create_device_from_hal");
let devices_fid = self.hub.devices.prepare(device_id_in);
let queues_fid = self.hub.queues.prepare(queue_id_in);
let adapter = self.hub.adapters.get(adapter_id);
let (device, queue) = adapter.create_device_and_queue_from_hal(
hal_device,
desc,
self.instance.flags,
trace_path,
)?;
let device_id = devices_fid.assign(device);
resource_log!("Created Device {:?}", device_id);
let queue_id = queues_fid.assign(queue);
resource_log!("Created Queue {:?}", queue_id);
Ok((device_id, queue_id))
}
}

227
vendor/wgpu-core/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,227 @@
//! This library safely implements WebGPU on native platforms.
//! It is designed for integration into browsers, as well as wrapping
//! into other language-specific user-friendly libraries.
//!
//! ## Feature flags
#![doc = document_features::document_features!()]
//!
// When we have no backends, we end up with a lot of dead or otherwise unreachable code.
#![cfg_attr(
all(
not(all(feature = "vulkan", not(target_arch = "wasm32"))),
not(all(feature = "metal", any(target_vendor = "apple"))),
not(all(feature = "dx12", windows)),
not(feature = "gles"),
),
allow(unused, clippy::let_and_return)
)]
#![cfg_attr(docsrs, feature(doc_cfg, doc_auto_cfg))]
#![allow(
// It is much clearer to assert negative conditions with eq! false
clippy::bool_assert_comparison,
// We don't use syntax sugar where it's not necessary.
clippy::match_like_matches_macro,
// Redundant matching is more explicit.
clippy::redundant_pattern_matching,
// Explicit lifetimes are often easier to reason about.
clippy::needless_lifetimes,
// No need for defaults in the internal types.
clippy::new_without_default,
// Needless updates are more scalable, easier to play with features.
clippy::needless_update,
// Need many arguments for some core functions to be able to re-use code in many situations.
clippy::too_many_arguments,
// For some reason `rustc` can warn about these in const generics even
// though they are required.
unused_braces,
// It gets in the way a lot and does not prevent bugs in practice.
clippy::pattern_type_mismatch,
// `wgpu-core` isn't entirely user-facing, so it's useful to document internal items.
rustdoc::private_intra_doc_links
)]
#![warn(
clippy::ptr_as_ptr,
trivial_casts,
trivial_numeric_casts,
unsafe_op_in_unsafe_fn,
unused_extern_crates,
unused_qualifications
)]
// We use `Arc` in wgpu-core, but on wasm (unless opted out via `fragile-send-sync-non-atomic-wasm`)
// wgpu-hal resources are not Send/Sync, causing a clippy warning for unnecessary `Arc`s.
// We could use `Rc`s in this case as recommended, but unless atomics are enabled
// this doesn't make a difference.
// Therefore, this is only really a concern for users targeting WebGL
// (the only reason to use wgpu-core on the web in the first place) that have atomics enabled.
#![cfg_attr(not(send_sync), allow(clippy::arc_with_non_send_sync))]
pub mod binding_model;
pub mod command;
mod conv;
pub mod device;
pub mod error;
pub mod global;
pub mod hal_api;
mod hash_utils;
pub mod hub;
pub mod id;
pub mod identity;
#[cfg(feature = "indirect-validation")]
mod indirect_validation;
mod init_tracker;
pub mod instance;
mod lock;
pub mod pipeline;
mod pipeline_cache;
mod pool;
pub mod present;
pub mod ray_tracing;
pub mod registry;
pub mod resource;
mod snatch;
pub mod storage;
mod track;
mod weak_vec;
// This is public for users who pre-compile shaders while still wanting to
// preserve all run-time checks that `wgpu-core` does.
// See <https://github.com/gfx-rs/wgpu/issues/3103>, after which this can be
// made private again.
mod scratch;
pub mod validation;
pub use validation::{map_storage_format_from_naga, map_storage_format_to_naga};
pub use hal::{api, MAX_BIND_GROUPS, MAX_COLOR_ATTACHMENTS, MAX_VERTEX_BUFFERS};
pub use naga;
use std::{borrow::Cow, os::raw::c_char};
pub(crate) use hash_utils::*;
/// The index of a queue submission.
///
/// These are the values stored in `Device::fence`.
pub type SubmissionIndex = hal::FenceValue;
type Index = u32;
type Epoch = u32;
pub type RawString = *const c_char;
pub type Label<'a> = Option<Cow<'a, str>>;
trait LabelHelpers<'a> {
fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str>;
fn to_string(&self) -> String;
}
impl<'a> LabelHelpers<'a> for Label<'a> {
fn to_hal(&'a self, flags: wgt::InstanceFlags) -> Option<&'a str> {
if flags.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) {
return None;
}
self.as_ref().map(|cow| cow.as_ref())
}
fn to_string(&self) -> String {
self.as_ref().map(|cow| cow.to_string()).unwrap_or_default()
}
}
pub fn hal_label(opt: Option<&str>, flags: wgt::InstanceFlags) -> Option<&str> {
if flags.contains(wgt::InstanceFlags::DISCARD_HAL_LABELS) {
return None;
}
opt
}
const DOWNLEVEL_WARNING_MESSAGE: &str = concat!(
"The underlying API or device in use does not ",
"support enough features to be a fully compliant implementation of WebGPU. ",
"A subset of the features can still be used. ",
"If you are running this program on native and not in a browser and wish to limit ",
"the features you use to the supported subset, ",
"call Adapter::downlevel_properties or Device::downlevel_properties to get ",
"a listing of the features the current ",
"platform supports."
);
const DOWNLEVEL_ERROR_MESSAGE: &str = concat!(
"This is not an invalid use of WebGPU: the underlying API or device does not ",
"support enough features to be a fully compliant implementation. ",
"A subset of the features can still be used. ",
"If you are running this program on native and not in a browser ",
"and wish to work around this issue, call ",
"Adapter::downlevel_properties or Device::downlevel_properties ",
"to get a listing of the features the current platform supports."
);
#[cfg(feature = "api_log_info")]
macro_rules! api_log {
($($arg:tt)+) => (log::info!($($arg)+))
}
#[cfg(not(feature = "api_log_info"))]
macro_rules! api_log {
($($arg:tt)+) => (log::trace!($($arg)+))
}
#[cfg(feature = "api_log_info")]
macro_rules! api_log_debug {
($($arg:tt)+) => (log::info!($($arg)+))
}
#[cfg(not(feature = "api_log_info"))]
macro_rules! api_log_debug {
($($arg:tt)+) => (log::debug!($($arg)+))
}
pub(crate) use api_log;
pub(crate) use api_log_debug;
#[cfg(feature = "resource_log_info")]
macro_rules! resource_log {
($($arg:tt)+) => (log::info!($($arg)+))
}
#[cfg(not(feature = "resource_log_info"))]
macro_rules! resource_log {
($($arg:tt)+) => (log::trace!($($arg)+))
}
pub(crate) use resource_log;
#[inline]
pub(crate) fn get_lowest_common_denom(a: u32, b: u32) -> u32 {
let gcd = if a >= b {
get_greatest_common_divisor(a, b)
} else {
get_greatest_common_divisor(b, a)
};
a * b / gcd
}
#[inline]
pub(crate) fn get_greatest_common_divisor(mut a: u32, mut b: u32) -> u32 {
assert!(a >= b);
loop {
let c = a % b;
if c == 0 {
return b;
} else {
a = b;
b = c;
}
}
}
#[test]
fn test_lcd() {
assert_eq!(get_lowest_common_denom(2, 2), 2);
assert_eq!(get_lowest_common_denom(2, 3), 6);
assert_eq!(get_lowest_common_denom(6, 4), 12);
}
#[test]
fn test_gcd() {
assert_eq!(get_greatest_common_divisor(5, 1), 1);
assert_eq!(get_greatest_common_divisor(4, 2), 2);
assert_eq!(get_greatest_common_divisor(6, 4), 2);
assert_eq!(get_greatest_common_divisor(7, 7), 7);
}

54
vendor/wgpu-core/src/lock/mod.rs vendored Normal file
View File

@@ -0,0 +1,54 @@
//! Instrumented lock types.
//!
//! This module defines a set of instrumented wrappers for the lock
//! types used in `wgpu-core` ([`Mutex`], [`RwLock`], and
//! [`SnatchLock`]) that help us understand and validate `wgpu-core`
//! synchronization.
//!
//! - The [`ranked`] module defines lock types that perform run-time
//! checks to ensure that each thread acquires locks only in a
//! specific order, to prevent deadlocks.
//!
//! - The [`observing`] module defines lock types that record
//! `wgpu-core`'s lock acquisition activity to disk, for later
//! analysis by the `lock-analyzer` binary.
//!
//! - The [`vanilla`] module defines lock types that are
//! uninstrumented, no-overhead wrappers around the standard lock
//! types.
//!
//! If the `wgpu_validate_locks` config is set (for example, with
//! `RUSTFLAGS='--cfg wgpu_validate_locks'`), `wgpu-core` uses the
//! [`ranked`] module's locks. We hope to make this the default for
//! debug builds soon.
//!
//! If the `observe_locks` feature is enabled, `wgpu-core` uses the
//! [`observing`] module's locks.
//!
//! Otherwise, `wgpu-core` uses the [`vanilla`] module's locks.
//!
//! [`Mutex`]: parking_lot::Mutex
//! [`RwLock`]: parking_lot::RwLock
//! [`SnatchLock`]: crate::snatch::SnatchLock
pub mod rank;
#[cfg_attr(not(wgpu_validate_locks), allow(dead_code))]
mod ranked;
#[cfg(feature = "observe_locks")]
mod observing;
#[cfg_attr(any(wgpu_validate_locks, feature = "observe_locks"), allow(dead_code))]
mod vanilla;
#[cfg(wgpu_validate_locks)]
use ranked as chosen;
#[cfg(feature = "observe_locks")]
use observing as chosen;
#[cfg(not(any(wgpu_validate_locks, feature = "observe_locks")))]
use vanilla as chosen;
pub use chosen::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};

480
vendor/wgpu-core/src/lock/observing.rs vendored Normal file
View File

@@ -0,0 +1,480 @@
//! Lock types that observe lock acquisition order.
//!
//! This module's [`Mutex`] type is instrumented to observe the
//! nesting of `wgpu-core` lock acquisitions. Whenever `wgpu-core`
//! acquires one lock while it is already holding another, we note
//! that nesting pair. This tells us what the [`LockRank::followers`]
//! set for each lock would need to include to accommodate
//! `wgpu-core`'s observed behavior.
//!
//! When `wgpu-core`'s `observe_locks` feature is enabled, if the
//! `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is set to the
//! path of an existing directory, then every thread that acquires a
//! lock in `wgpu-core` will write its own log file to that directory.
//! You can then run the `wgpu` workspace's `lock-analyzer` binary to
//! read those files and summarize the results. The output from
//! `lock-analyzer` has the same form as the lock ranks given in
//! [`lock/rank.rs`].
//!
//! If the `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is not
//! set, then no instrumentation takes place, and the locks behave
//! normally.
//!
//! To make sure we capture all acquisitions regardless of when the
//! program exits, each thread writes events directly to its log file
//! as they occur. A `write` system call is generally just a copy from
//! userspace into the kernel's buffer, so hopefully this approach
//! will still have tolerable performance.
//!
//! [`lock/rank.rs`]: ../../../src/wgpu_core/lock/rank.rs.html
use crate::FastHashSet;
use super::rank::{LockRank, LockRankSet};
use std::{
cell::RefCell,
fs::File,
panic::Location,
path::{Path, PathBuf},
};
/// A `Mutex` instrumented for lock acquisition order observation.
///
/// This is just a wrapper around a [`parking_lot::Mutex`], along with
/// its rank in the `wgpu_core` lock ordering.
///
/// For details, see [the module documentation][self].
pub struct Mutex<T> {
inner: parking_lot::Mutex<T>,
rank: LockRank,
}
/// A guard produced by locking [`Mutex`].
///
/// This is just a wrapper around a [`parking_lot::MutexGuard`], along
/// with the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct MutexGuard<'a, T> {
inner: parking_lot::MutexGuard<'a, T>,
_state: LockStateGuard,
}
impl<T> Mutex<T> {
pub fn new(rank: LockRank, value: T) -> Mutex<T> {
Mutex {
inner: parking_lot::Mutex::new(value),
rank,
}
}
#[track_caller]
pub fn lock(&self) -> MutexGuard<T> {
let saved = acquire(self.rank, Location::caller());
MutexGuard {
inner: self.inner.lock(),
_state: LockStateGuard { saved },
}
}
}
impl<'a, T> std::ops::Deref for MutexGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.inner.deref_mut()
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for Mutex<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
/// An `RwLock` instrumented for lock acquisition order observation.
///
/// This is just a wrapper around a [`parking_lot::RwLock`], along with
/// its rank in the `wgpu_core` lock ordering.
///
/// For details, see [the module documentation][self].
pub struct RwLock<T> {
inner: parking_lot::RwLock<T>,
rank: LockRank,
}
/// A read guard produced by locking [`RwLock`] for reading.
///
/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with
/// the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct RwLockReadGuard<'a, T> {
inner: parking_lot::RwLockReadGuard<'a, T>,
_state: LockStateGuard,
}
/// A write guard produced by locking [`RwLock`] for writing.
///
/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along
/// with the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct RwLockWriteGuard<'a, T> {
inner: parking_lot::RwLockWriteGuard<'a, T>,
_state: LockStateGuard,
}
impl<T> RwLock<T> {
pub fn new(rank: LockRank, value: T) -> RwLock<T> {
RwLock {
inner: parking_lot::RwLock::new(value),
rank,
}
}
#[track_caller]
pub fn read(&self) -> RwLockReadGuard<T> {
let saved = acquire(self.rank, Location::caller());
RwLockReadGuard {
inner: self.inner.read(),
_state: LockStateGuard { saved },
}
}
#[track_caller]
pub fn write(&self) -> RwLockWriteGuard<T> {
let saved = acquire(self.rank, Location::caller());
RwLockWriteGuard {
inner: self.inner.write(),
_state: LockStateGuard { saved },
}
}
}
impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard {
inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
_state: this._state,
}
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.inner.deref_mut()
}
}
/// A container that restores a prior per-thread lock state when dropped.
///
/// This type serves two purposes:
///
/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
/// destructure lock guards and reassemble their pieces into new guards, but
/// if the guard type itself implements `Drop`, we can't destructure it
/// without unsafe code or pointless `Option`s whose state is almost always
/// statically known.
///
/// - We can just implement `Drop` for this type once, and then use it in lock
/// guards, rather than implementing `Drop` separately for each guard type.
struct LockStateGuard {
/// The youngest lock that was already held when we acquired this
/// one, if any.
saved: Option<HeldLock>,
}
impl Drop for LockStateGuard {
fn drop(&mut self) {
release(self.saved)
}
}
/// Check and record the acquisition of a lock with `new_rank`.
///
/// Log the acquisition of a lock with `new_rank`, and
/// update the per-thread state accordingly.
///
/// Return the `Option<HeldLock>` state that must be restored when this lock is
/// released.
fn acquire(new_rank: LockRank, location: &'static Location<'static>) -> Option<HeldLock> {
LOCK_STATE.with_borrow_mut(|state| match *state {
ThreadState::Disabled => None,
ThreadState::Initial => {
let Ok(dir) = std::env::var("WGPU_CORE_LOCK_OBSERVE_DIR") else {
*state = ThreadState::Disabled;
return None;
};
// Create the observation log file.
let mut log = ObservationLog::create(dir)
.expect("Failed to open lock observation file (does the dir exist?)");
// Log the full set of lock ranks, so that the analysis can even see
// locks that are only acquired in isolation.
for rank in LockRankSet::all().iter() {
log.write_rank(rank);
}
// Update our state to reflect that we are logging acquisitions, and
// that we have acquired this lock.
*state = ThreadState::Enabled {
held_lock: Some(HeldLock {
rank: new_rank,
location,
}),
log,
};
// Since this is the first acquisition on this thread, we know that
// there is no prior lock held, and thus nothing to log yet.
None
}
ThreadState::Enabled {
ref mut held_lock,
ref mut log,
} => {
if let Some(ref held_lock) = held_lock {
log.write_acquisition(held_lock, new_rank, location);
}
std::mem::replace(
held_lock,
Some(HeldLock {
rank: new_rank,
location,
}),
)
}
})
}
/// Record the release of a lock whose saved state was `saved`.
fn release(saved: Option<HeldLock>) {
LOCK_STATE.with_borrow_mut(|state| {
if let ThreadState::Enabled {
ref mut held_lock, ..
} = *state
{
*held_lock = saved;
}
});
}
thread_local! {
static LOCK_STATE: RefCell<ThreadState> = const { RefCell::new(ThreadState::Initial) };
}
/// Thread-local state for lock observation.
enum ThreadState {
/// This thread hasn't yet checked the environment variable.
Initial,
/// This thread checked the environment variable, and it was
/// unset, so this thread is not observing lock acquisitions.
Disabled,
/// Lock observation is enabled for this thread.
Enabled {
held_lock: Option<HeldLock>,
log: ObservationLog,
},
}
/// Information about a currently held lock.
#[derive(Debug, Copy, Clone)]
struct HeldLock {
/// The lock's rank.
rank: LockRank,
/// Where we acquired the lock.
location: &'static Location<'static>,
}
/// A log to which we can write observations of lock activity.
struct ObservationLog {
/// The file to which we are logging lock observations.
log_file: File,
/// [`Location`]s we've seen so far.
///
/// This is a hashset of raw pointers because raw pointers have
/// the [`Eq`] and [`Hash`] relations we want: the pointer value, not
/// the contents. There's no unsafe code in this module.
locations_seen: FastHashSet<*const Location<'static>>,
/// Buffer for serializing events, retained for allocation reuse.
buffer: Vec<u8>,
}
#[allow(trivial_casts)]
impl ObservationLog {
/// Create an observation log in `dir` for the current pid and thread.
fn create(dir: impl AsRef<Path>) -> Result<Self, std::io::Error> {
let mut path = PathBuf::from(dir.as_ref());
path.push(format!(
"locks-{}.{:?}.ron",
std::process::id(),
std::thread::current().id()
));
let log_file = File::create(&path)?;
Ok(ObservationLog {
log_file,
locations_seen: FastHashSet::default(),
buffer: Vec::new(),
})
}
/// Record the acquisition of one lock while holding another.
///
/// Log that we acquired a lock of `new_rank` at `new_location` while still
/// holding other locks, the most recently acquired of which has
/// `older_rank`.
fn write_acquisition(
&mut self,
older_lock: &HeldLock,
new_rank: LockRank,
new_location: &'static Location<'static>,
) {
self.write_location(older_lock.location);
self.write_location(new_location);
self.write_action(&Action::Acquisition {
older_rank: older_lock.rank.bit.number(),
older_location: addr(older_lock.location),
newer_rank: new_rank.bit.number(),
newer_location: addr(new_location),
});
}
fn write_location(&mut self, location: &'static Location<'static>) {
if self.locations_seen.insert(location) {
self.write_action(&Action::Location {
address: addr(location),
file: location.file(),
line: location.line(),
column: location.column(),
});
}
}
fn write_rank(&mut self, rank: LockRankSet) {
self.write_action(&Action::Rank {
bit: rank.number(),
member_name: rank.member_name(),
const_name: rank.const_name(),
});
}
fn write_action(&mut self, action: &Action) {
use std::io::Write;
self.buffer.clear();
ron::ser::to_writer(&mut self.buffer, &action)
.expect("error serializing `lock::observing::Action`");
self.buffer.push(b'\n');
self.log_file
.write_all(&self.buffer)
.expect("error writing `lock::observing::Action`");
}
}
/// An action logged by a thread that is observing lock acquisition order.
///
/// Each thread's log file is a sequence of these enums, serialized
/// using the [`ron`] crate, one action per line.
///
/// Lock observation cannot assume that there will be any convenient
/// finalization point before the program exits, so in practice,
/// actions must be written immediately when they occur. This means we
/// can't, say, accumulate tables and write them out when they're
/// complete. The `lock-analyzer` binary is then responsible for
/// consolidating the data into a single table of observed transitions.
#[derive(serde::Serialize)]
enum Action {
/// A location that we will refer to in later actions.
///
/// We write one of these events the first time we see a
/// particular `Location`. Treating this as a separate action
/// simply lets us avoid repeating the content over and over
/// again in every [`Acquisition`] action.
///
/// [`Acquisition`]: Action::Acquisition
Location {
address: usize,
file: &'static str,
line: u32,
column: u32,
},
/// A lock rank that we will refer to in later actions.
///
/// We write out one these events for every lock rank at the
/// beginning of each thread's log file. Treating this as a
/// separate action simply lets us avoid repeating the names over
/// and over again in every [`Acquisition`] action.
///
/// [`Acquisition`]: Action::Acquisition
Rank {
bit: u32,
member_name: &'static str,
const_name: &'static str,
},
/// An attempt to acquire a lock while holding another lock.
Acquisition {
/// The number of the already acquired lock's rank.
older_rank: u32,
/// The source position at which we acquired it. Specifically,
/// its `Location`'s address, as an integer.
older_location: usize,
/// The number of the rank of the lock we are acquiring.
newer_rank: u32,
/// The source position at which we are acquiring it.
/// Specifically, its `Location`'s address, as an integer.
newer_location: usize,
},
}
impl LockRankSet {
/// Return the number of this rank's first member.
fn number(self) -> u32 {
self.bits().trailing_zeros()
}
}
/// Convenience for `std::ptr::from_ref(t) as usize`.
fn addr<T>(t: &T) -> usize {
std::ptr::from_ref(t) as usize
}

160
vendor/wgpu-core/src/lock/rank.rs vendored Normal file
View File

@@ -0,0 +1,160 @@
//! Ranks for `wgpu-core` locks, restricting acquisition order.
//!
//! See [`LockRank`].
/// The rank of a lock.
///
/// Each [`Mutex`], [`RwLock`], and [`SnatchLock`] in `wgpu-core` has been
/// assigned a *rank*: a node in the DAG defined at the bottom of
/// `wgpu-core/src/lock/rank.rs`. The rank of the most recently
/// acquired lock you are still holding determines which locks you may
/// attempt to acquire next.
///
/// When you create a lock in `wgpu-core`, you must specify its rank
/// by passing in a [`LockRank`] value. This module declares a
/// pre-defined set of ranks to cover everything in `wgpu-core`, named
/// after the type in which they occur, and the name of the type's
/// field that is a lock. For example, [`CommandBuffer::data`] is a
/// `Mutex`, and its rank here is the constant
/// [`COMMAND_BUFFER_DATA`].
///
/// [`Mutex`]: parking_lot::Mutex
/// [`RwLock`]: parking_lot::RwLock
/// [`SnatchLock`]: crate::snatch::SnatchLock
/// [`CommandBuffer::data`]: crate::command::CommandBuffer::data
#[derive(Debug, Copy, Clone)]
pub struct LockRank {
/// The bit representing this lock.
///
/// There should only be a single bit set in this value.
pub(super) bit: LockRankSet,
/// A bitmask of permitted successor ranks.
///
/// If `rank` is the rank of the most recently acquired lock we
/// are still holding, then `rank.followers` is the mask of
/// locks we are allowed to acquire next.
///
/// The `define_lock_ranks!` macro ensures that there are no
/// cycles in the graph of lock ranks and their followers.
pub(super) followers: LockRankSet,
}
/// Define a set of lock ranks, and each rank's permitted successors.
macro_rules! define_lock_ranks {
{
$(
$( #[ $attr:meta ] )*
rank $name:ident $member:literal followed by { $( $follower:ident ),* $(,)? }
)*
} => {
// An enum that assigns a unique number to each rank.
#[allow(non_camel_case_types, clippy::upper_case_acronyms)]
enum LockRankNumber { $( $name, )* }
bitflags::bitflags! {
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
/// A bitflags type representing a set of lock ranks.
pub struct LockRankSet: u64 {
$(
const $name = 1 << (LockRankNumber:: $name as u64);
)*
}
}
impl LockRankSet {
pub fn member_name(self) -> &'static str {
match self {
$(
LockRankSet:: $name => $member,
)*
_ => "<unrecognized LockRankSet bit>",
}
}
#[cfg_attr(not(feature = "observe_locks"), allow(dead_code))]
pub fn const_name(self) -> &'static str {
match self {
$(
LockRankSet:: $name => stringify!($name),
)*
_ => "<unrecognized LockRankSet bit>",
}
}
}
$(
// If there is any cycle in the ranking, the initializers
// for `followers` will be cyclic, and rustc will give us
// an error message explaining the cycle.
$( #[ $attr ] )*
pub const $name: LockRank = LockRank {
bit: LockRankSet:: $name,
followers: LockRankSet::empty() $( .union($follower.bit) )*,
};
)*
}
}
define_lock_ranks! {
rank COMMAND_BUFFER_DATA "CommandBuffer::data" followed by {
DEVICE_SNATCHABLE_LOCK,
DEVICE_USAGE_SCOPES,
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
BUFFER_MAP_STATE,
}
rank DEVICE_SNATCHABLE_LOCK "Device::snatchable_lock" followed by {
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
DEVICE_TRACE,
BUFFER_MAP_STATE,
// Uncomment this to see an interesting cycle.
// COMMAND_BUFFER_DATA,
}
rank BUFFER_MAP_STATE "Buffer::map_state" followed by {
QUEUE_PENDING_WRITES,
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
DEVICE_TRACE,
}
rank QUEUE_PENDING_WRITES "Queue::pending_writes" followed by {
COMMAND_ALLOCATOR_FREE_ENCODERS,
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
QUEUE_LIFE_TRACKER,
}
rank QUEUE_LIFE_TRACKER "Queue::life_tracker" followed by {
COMMAND_ALLOCATOR_FREE_ENCODERS,
DEVICE_TRACE,
}
rank COMMAND_ALLOCATOR_FREE_ENCODERS "CommandAllocator::free_encoders" followed by {
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
}
rank BUFFER_BIND_GROUPS "Buffer::bind_groups" followed by { }
rank BUFFER_INITIALIZATION_STATUS "Buffer::initialization_status" followed by { }
rank DEVICE_DEFERRED_DESTROY "Device::deferred_destroy" followed by { }
rank DEVICE_FENCE "Device::fence" followed by { }
#[allow(dead_code)]
rank DEVICE_TRACE "Device::trace" followed by { }
rank DEVICE_TRACKERS "Device::trackers" followed by { }
rank DEVICE_LOST_CLOSURE "Device::device_lost_closure" followed by { }
rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
rank REGISTRY_STORAGE "Registry::storage" followed by { }
rank RESOURCE_POOL_INNER "ResourcePool::inner" followed by { }
rank SHARED_TRACKER_INDEX_ALLOCATOR_INNER "SharedTrackerIndexAllocator::inner" followed by { }
rank SURFACE_PRESENTATION "Surface::presentation" followed by { }
rank TEXTURE_BIND_GROUPS "Texture::bind_groups" followed by { }
rank TEXTURE_INITIALIZATION_STATUS "Texture::initialization_status" followed by { }
rank TEXTURE_VIEWS "Texture::views" followed by { }
rank BLAS_BUILT_INDEX "Blas::built_index" followed by { }
rank TLAS_BUILT_INDEX "Tlas::built_index" followed by { }
rank TLAS_DEPENDENCIES "Tlas::dependencies" followed by { }
#[cfg(test)]
rank PAWN "pawn" followed by { ROOK, BISHOP }
#[cfg(test)]
rank ROOK "rook" followed by { KNIGHT }
#[cfg(test)]
rank KNIGHT "knight" followed by { }
#[cfg(test)]
rank BISHOP "bishop" followed by { }
}

387
vendor/wgpu-core/src/lock/ranked.rs vendored Normal file
View File

@@ -0,0 +1,387 @@
//! Lock types that enforce well-ranked lock acquisition order.
//!
//! This module's [`Mutex`] and [`RwLock` types are instrumented to check that
//! `wgpu-core` acquires locks according to their rank, to prevent deadlocks. To
//! use it, put `--cfg wgpu_validate_locks` in `RUSTFLAGS`.
//!
//! The [`LockRank`] constants in the [`lock::rank`] module describe edges in a
//! directed graph of lock acquisitions: each lock's rank says, if this is the most
//! recently acquired lock that you are still holding, then these are the locks you
//! are allowed to acquire next.
//!
//! As long as this graph doesn't have cycles, any number of threads can acquire
//! locks along paths through the graph without deadlock:
//!
//! - Assume that if a thread is holding a lock, then it will either release it,
//! or block trying to acquire another one. No thread just sits on its locks
//! forever for unrelated reasons. If it did, then that would be a source of
//! deadlock "outside the system" that we can't do anything about.
//!
//! - This module asserts that threads acquire and release locks in a stack-like
//! order: a lock is dropped only when it is the *most recently acquired* lock
//! *still held* - call this the "youngest" lock. This stack-like ordering
//! isn't a Rust requirement; Rust lets you drop guards in any order you like.
//! This is a restriction we impose.
//!
//! - Consider the directed graph whose nodes are locks, and whose edges go from
//! each lock to its permitted followers, the locks in its [`LockRank::followers`]
//! set. The definition of the [`lock::rank`] module's [`LockRank`] constants
//! ensures that this graph has no cycles, including trivial cycles from a node to
//! itself.
//!
//! - This module then asserts that each thread attempts to acquire a lock only if
//! it is among its youngest lock's permitted followers. Thus, as a thread
//! acquires locks, it must be traversing a path through the graph along its
//! edges.
//!
//! - Because there are no cycles in the graph, whenever one thread is blocked
//! waiting to acquire a lock, that lock must be held by a different thread: if
//! you were allowed to acquire a lock you already hold, that would be a cycle in
//! the graph.
//!
//! - Furthermore, because the graph has no cycles, as we work our way from each
//! thread to the thread it is blocked waiting for, we must eventually reach an
//! end point: there must be some thread that is able to acquire its next lock, or
//! that is about to release a lock.
//!
//! Thus, the system as a whole is always able to make progress: it is free of
//! deadlocks.
//!
//! Note that this validation only monitors each thread's behavior in isolation:
//! there's only thread-local state, nothing communicated between threads. So we
//! don't detect deadlocks, per se, only the potential to cause deadlocks. This
//! means that the validation is conservative, but more reproducible, since it's not
//! dependent on any particular interleaving of execution.
//!
//! [`lock::rank`]: crate::lock::rank
use super::rank::LockRank;
use std::{cell::Cell, panic::Location};
/// A `Mutex` instrumented for deadlock prevention.
///
/// This is just a wrapper around a [`parking_lot::Mutex`], along with
/// its rank in the `wgpu_core` lock ordering.
///
/// For details, see [the module documentation][self].
pub struct Mutex<T> {
inner: parking_lot::Mutex<T>,
rank: LockRank,
}
/// A guard produced by locking [`Mutex`].
///
/// This is just a wrapper around a [`parking_lot::MutexGuard`], along
/// with the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct MutexGuard<'a, T> {
inner: parking_lot::MutexGuard<'a, T>,
saved: LockStateGuard,
}
thread_local! {
static LOCK_STATE: Cell<LockState> = const { Cell::new(LockState::INITIAL) };
}
/// Per-thread state for the deadlock checker.
#[derive(Debug, Copy, Clone)]
struct LockState {
/// The last lock we acquired, and where.
last_acquired: Option<(LockRank, &'static Location<'static>)>,
/// The number of locks currently held.
///
/// This is used to enforce stack-like lock acquisition and release.
depth: u32,
}
impl LockState {
const INITIAL: LockState = LockState {
last_acquired: None,
depth: 0,
};
}
/// A container that restores a [`LockState`] when dropped.
///
/// This type serves two purposes:
///
/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
/// destructure lock guards and reassemble their pieces into new guards, but
/// if the guard type itself implements `Drop`, we can't destructure it
/// without unsafe code or pointless `Option`s whose state is almost always
/// statically known.
///
/// - We can just implement `Drop` for this type once, and then use it in lock
/// guards, rather than implementing `Drop` separately for each guard type.
struct LockStateGuard(LockState);
impl Drop for LockStateGuard {
fn drop(&mut self) {
release(self.0)
}
}
/// Check and record the acquisition of a lock with `new_rank`.
///
/// Check that acquiring a lock with `new_rank` is permitted at this point, and
/// update the per-thread state accordingly.
///
/// Return the `LockState` that must be restored when this thread is released.
fn acquire(new_rank: LockRank, location: &'static Location<'static>) -> LockState {
let state = LOCK_STATE.get();
// Initially, it's fine to acquire any lock. So we only
// need to check when `last_acquired` is `Some`.
if let Some((ref last_rank, ref last_location)) = state.last_acquired {
assert!(
last_rank.followers.contains(new_rank.bit),
"Attempt to acquire nested mutexes in wrong order:\n\
last locked {:<35} at {}\n\
now locking {:<35} at {}\n\
Locking {} after locking {} is not permitted.",
last_rank.bit.member_name(),
last_location,
new_rank.bit.member_name(),
location,
new_rank.bit.member_name(),
last_rank.bit.member_name(),
);
}
LOCK_STATE.set(LockState {
last_acquired: Some((new_rank, location)),
depth: state.depth + 1,
});
state
}
/// Record the release of a lock whose saved state was `saved`.
///
/// Check that locks are being acquired in stacking order, and update the
/// per-thread state accordingly.
fn release(saved: LockState) {
let prior = LOCK_STATE.replace(saved);
// Although Rust allows mutex guards to be dropped in any
// order, this analysis requires that locks be acquired and
// released in stack order: the next lock to be released must be
// the most recently acquired lock still held.
assert_eq!(
prior.depth,
saved.depth + 1,
"Lock not released in stacking order"
);
}
impl<T> Mutex<T> {
pub fn new(rank: LockRank, value: T) -> Mutex<T> {
Mutex {
inner: parking_lot::Mutex::new(value),
rank,
}
}
#[track_caller]
pub fn lock(&self) -> MutexGuard<T> {
let saved = acquire(self.rank, Location::caller());
MutexGuard {
inner: self.inner.lock(),
saved: LockStateGuard(saved),
}
}
}
impl<'a, T> std::ops::Deref for MutexGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.inner.deref_mut()
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for Mutex<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
/// An `RwLock` instrumented for deadlock prevention.
///
/// This is just a wrapper around a [`parking_lot::RwLock`], along with
/// its rank in the `wgpu_core` lock ordering.
///
/// For details, see [the module documentation][self].
pub struct RwLock<T> {
inner: parking_lot::RwLock<T>,
rank: LockRank,
}
/// A read guard produced by locking [`RwLock`] for reading.
///
/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with
/// the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct RwLockReadGuard<'a, T> {
inner: parking_lot::RwLockReadGuard<'a, T>,
saved: LockStateGuard,
}
/// A write guard produced by locking [`RwLock`] for writing.
///
/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along
/// with the state needed to track lock acquisition.
///
/// For details, see [the module documentation][self].
pub struct RwLockWriteGuard<'a, T> {
inner: parking_lot::RwLockWriteGuard<'a, T>,
saved: LockStateGuard,
}
impl<T> RwLock<T> {
pub fn new(rank: LockRank, value: T) -> RwLock<T> {
RwLock {
inner: parking_lot::RwLock::new(value),
rank,
}
}
#[track_caller]
pub fn read(&self) -> RwLockReadGuard<T> {
let saved = acquire(self.rank, Location::caller());
RwLockReadGuard {
inner: self.inner.read(),
saved: LockStateGuard(saved),
}
}
#[track_caller]
pub fn write(&self) -> RwLockWriteGuard<T> {
let saved = acquire(self.rank, Location::caller());
RwLockWriteGuard {
inner: self.inner.write(),
saved: LockStateGuard(saved),
}
}
}
impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard {
inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
saved: this.saved,
}
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.inner.deref()
}
}
impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.inner.deref_mut()
}
}
/// Locks can be acquired in the order indicated by their ranks.
#[test]
fn permitted() {
use super::rank;
let lock1 = Mutex::new(rank::PAWN, ());
let lock2 = Mutex::new(rank::ROOK, ());
let _guard1 = lock1.lock();
let _guard2 = lock2.lock();
}
/// Locks can only be acquired in the order indicated by their ranks.
#[test]
#[should_panic(expected = "Locking pawn after locking rook")]
fn forbidden_unrelated() {
use super::rank;
let lock1 = Mutex::new(rank::ROOK, ());
let lock2 = Mutex::new(rank::PAWN, ());
let _guard1 = lock1.lock();
let _guard2 = lock2.lock();
}
/// Lock acquisitions can't skip ranks.
///
/// These two locks *could* be acquired in this order, but only if other locks
/// are acquired in between them. Skipping ranks isn't allowed.
#[test]
#[should_panic(expected = "Locking knight after locking pawn")]
fn forbidden_skip() {
use super::rank;
let lock1 = Mutex::new(rank::PAWN, ());
let lock2 = Mutex::new(rank::KNIGHT, ());
let _guard1 = lock1.lock();
let _guard2 = lock2.lock();
}
/// Locks can be acquired and released in a stack-like order.
#[test]
fn stack_like() {
use super::rank;
let lock1 = Mutex::new(rank::PAWN, ());
let lock2 = Mutex::new(rank::ROOK, ());
let lock3 = Mutex::new(rank::BISHOP, ());
let guard1 = lock1.lock();
let guard2 = lock2.lock();
drop(guard2);
let guard3 = lock3.lock();
drop(guard3);
drop(guard1);
}
/// Locks can only be acquired and released in a stack-like order.
#[test]
#[should_panic(expected = "Lock not released in stacking order")]
fn non_stack_like() {
use super::rank;
let lock1 = Mutex::new(rank::PAWN, ());
let lock2 = Mutex::new(rank::ROOK, ());
let guard1 = lock1.lock();
let guard2 = lock2.lock();
// Avoid a double panic from dropping this while unwinding due to the panic
// we're testing for.
std::mem::forget(guard2);
drop(guard1);
}

121
vendor/wgpu-core/src/lock/vanilla.rs vendored Normal file
View File

@@ -0,0 +1,121 @@
//! Plain, uninstrumented wrappers around [`parking_lot`] lock types.
//!
//! These definitions are used when no particular lock instrumentation
//! Cargo feature is selected.
/// A plain wrapper around [`parking_lot::Mutex`].
///
/// This is just like [`parking_lot::Mutex`], except that our [`new`]
/// method takes a rank, indicating where the new mutex should sit in
/// `wgpu-core`'s lock ordering. The rank is ignored.
///
/// See the [`lock`] module documentation for other wrappers.
///
/// [`new`]: Mutex::new
/// [`lock`]: crate::lock
pub struct Mutex<T>(parking_lot::Mutex<T>);
/// A guard produced by locking [`Mutex`].
///
/// This is just a wrapper around a [`parking_lot::MutexGuard`].
pub struct MutexGuard<'a, T>(parking_lot::MutexGuard<'a, T>);
impl<T> Mutex<T> {
pub fn new(_rank: super::rank::LockRank, value: T) -> Mutex<T> {
Mutex(parking_lot::Mutex::new(value))
}
pub fn lock(&self) -> MutexGuard<T> {
MutexGuard(self.0.lock())
}
}
impl<'a, T> std::ops::Deref for MutexGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.0.deref()
}
}
impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.0.deref_mut()
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for Mutex<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
/// A plain wrapper around [`parking_lot::RwLock`].
///
/// This is just like [`parking_lot::RwLock`], except that our [`new`]
/// method takes a rank, indicating where the new mutex should sit in
/// `wgpu-core`'s lock ordering. The rank is ignored.
///
/// See the [`lock`] module documentation for other wrappers.
///
/// [`new`]: RwLock::new
/// [`lock`]: crate::lock
pub struct RwLock<T>(parking_lot::RwLock<T>);
/// A read guard produced by locking [`RwLock`] as a reader.
///
/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`].
pub struct RwLockReadGuard<'a, T>(parking_lot::RwLockReadGuard<'a, T>);
/// A write guard produced by locking [`RwLock`] as a writer.
///
/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`].
pub struct RwLockWriteGuard<'a, T>(parking_lot::RwLockWriteGuard<'a, T>);
impl<T> RwLock<T> {
pub fn new(_rank: super::rank::LockRank, value: T) -> RwLock<T> {
RwLock(parking_lot::RwLock::new(value))
}
pub fn read(&self) -> RwLockReadGuard<T> {
RwLockReadGuard(self.0.read())
}
pub fn write(&self) -> RwLockWriteGuard<T> {
RwLockWriteGuard(self.0.write())
}
}
impl<'a, T> RwLockWriteGuard<'a, T> {
pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
RwLockReadGuard(parking_lot::RwLockWriteGuard::downgrade(this.0))
}
}
impl<T: std::fmt::Debug> std::fmt::Debug for RwLock<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
}
impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.0.deref()
}
}
impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.0.deref()
}
}
impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.0.deref_mut()
}
}

614
vendor/wgpu-core/src/pipeline.rs vendored Normal file
View File

@@ -0,0 +1,614 @@
pub use crate::pipeline_cache::PipelineCacheValidationError;
use crate::{
binding_model::{CreateBindGroupLayoutError, CreatePipelineLayoutError, PipelineLayout},
command::ColorAttachmentError,
device::{Device, DeviceError, MissingDownlevelFlags, MissingFeatures, RenderPassContext},
id::{PipelineCacheId, PipelineLayoutId, ShaderModuleId},
resource::{InvalidResourceError, Labeled, TrackingData},
resource_log, validation, Label,
};
use arrayvec::ArrayVec;
use naga::error::ShaderError;
use std::{borrow::Cow, marker::PhantomData, mem::ManuallyDrop, num::NonZeroU32, sync::Arc};
use thiserror::Error;
/// Information about buffer bindings, which
/// is validated against the shader (and pipeline)
/// at draw time as opposed to initialization time.
#[derive(Debug)]
pub(crate) struct LateSizedBufferGroup {
// The order has to match `BindGroup::late_buffer_binding_sizes`.
pub(crate) shader_sizes: Vec<wgt::BufferAddress>,
}
#[allow(clippy::large_enum_variant)]
pub enum ShaderModuleSource<'a> {
#[cfg(feature = "wgsl")]
Wgsl(Cow<'a, str>),
#[cfg(feature = "glsl")]
Glsl(Cow<'a, str>, naga::front::glsl::Options),
#[cfg(feature = "spirv")]
SpirV(Cow<'a, [u32]>, naga::front::spv::Options),
Naga(Cow<'static, naga::Module>),
/// Dummy variant because `Naga` doesn't have a lifetime and without enough active features it
/// could be the last one active.
#[doc(hidden)]
Dummy(PhantomData<&'a ()>),
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ShaderModuleDescriptor<'a> {
pub label: Label<'a>,
#[cfg_attr(feature = "serde", serde(default))]
pub runtime_checks: wgt::ShaderRuntimeChecks,
}
#[derive(Debug)]
pub struct ShaderModule {
pub(crate) raw: ManuallyDrop<Box<dyn hal::DynShaderModule>>,
pub(crate) device: Arc<Device>,
pub(crate) interface: Option<validation::Interface>,
/// The `label` from the descriptor used to create the resource.
pub(crate) label: String,
}
impl Drop for ShaderModule {
fn drop(&mut self) {
resource_log!("Destroy raw {}", self.error_ident());
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
unsafe {
self.device.raw().destroy_shader_module(raw);
}
}
}
crate::impl_resource_type!(ShaderModule);
crate::impl_labeled!(ShaderModule);
crate::impl_parent_device!(ShaderModule);
crate::impl_storage_item!(ShaderModule);
impl ShaderModule {
pub(crate) fn raw(&self) -> &dyn hal::DynShaderModule {
self.raw.as_ref()
}
pub(crate) fn finalize_entry_point_name(
&self,
stage_bit: wgt::ShaderStages,
entry_point: Option<&str>,
) -> Result<String, validation::StageError> {
match &self.interface {
Some(interface) => interface.finalize_entry_point_name(stage_bit, entry_point),
None => entry_point
.map(|ep| ep.to_string())
.ok_or(validation::StageError::NoEntryPointFound),
}
}
}
//Note: `Clone` would require `WithSpan: Clone`.
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreateShaderModuleError {
#[cfg(any(feature = "wgsl", feature = "indirect-validation"))]
#[error(transparent)]
Parsing(#[from] ShaderError<naga::front::wgsl::ParseError>),
#[cfg(feature = "glsl")]
#[error(transparent)]
ParsingGlsl(#[from] ShaderError<naga::front::glsl::ParseErrors>),
#[cfg(feature = "spirv")]
#[error(transparent)]
ParsingSpirV(#[from] ShaderError<naga::front::spv::Error>),
#[error("Failed to generate the backend-specific code")]
Generation,
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
Validation(#[from] ShaderError<naga::WithSpan<naga::valid::ValidationError>>),
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
#[error(
"Shader global {bind:?} uses a group index {group} that exceeds the max_bind_groups limit of {limit}."
)]
InvalidGroupIndex {
bind: naga::ResourceBinding,
group: u32,
limit: u32,
},
}
/// Describes a programmable pipeline stage.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ProgrammableStageDescriptor<'a> {
/// The compiled shader module for this stage.
pub module: ShaderModuleId,
/// The name of the entry point in the compiled shader. The name is selected using the
/// following logic:
///
/// * If `Some(name)` is specified, there must be a function with this name in the shader.
/// * If a single entry point associated with this stage must be in the shader, then proceed as
/// if `Some(…)` was specified with that entry point's name.
pub entry_point: Option<Cow<'a, str>>,
/// Specifies the values of pipeline-overridable constants in the shader module.
///
/// If an `@id` attribute was specified on the declaration,
/// the key must be the pipeline constant ID as a decimal ASCII number; if not,
/// the key must be the constant's identifier name.
///
/// The value may represent any of WGSL's concrete scalar types.
pub constants: Cow<'a, naga::back::PipelineConstants>,
/// Whether workgroup scoped memory will be initialized with zero values for this stage.
///
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
}
/// Describes a programmable pipeline stage.
#[derive(Clone, Debug)]
pub struct ResolvedProgrammableStageDescriptor<'a> {
/// The compiled shader module for this stage.
pub module: Arc<ShaderModule>,
/// The name of the entry point in the compiled shader. The name is selected using the
/// following logic:
///
/// * If `Some(name)` is specified, there must be a function with this name in the shader.
/// * If a single entry point associated with this stage must be in the shader, then proceed as
/// if `Some(…)` was specified with that entry point's name.
pub entry_point: Option<Cow<'a, str>>,
/// Specifies the values of pipeline-overridable constants in the shader module.
///
/// If an `@id` attribute was specified on the declaration,
/// the key must be the pipeline constant ID as a decimal ASCII number; if not,
/// the key must be the constant's identifier name.
///
/// The value may represent any of WGSL's concrete scalar types.
pub constants: Cow<'a, naga::back::PipelineConstants>,
/// Whether workgroup scoped memory will be initialized with zero values for this stage.
///
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
}
/// Number of implicit bind groups derived at pipeline creation.
pub type ImplicitBindGroupCount = u8;
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ImplicitLayoutError {
#[error("The implicit_pipeline_ids arg is required")]
MissingImplicitPipelineIds,
#[error("Missing IDs for deriving {0} bind groups")]
MissingIds(ImplicitBindGroupCount),
#[error("Unable to reflect the shader {0:?} interface")]
ReflectionError(wgt::ShaderStages),
#[error(transparent)]
BindGroup(#[from] CreateBindGroupLayoutError),
#[error(transparent)]
Pipeline(#[from] CreatePipelineLayoutError),
}
/// Describes a compute pipeline.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct ComputePipelineDescriptor<'a> {
pub label: Label<'a>,
/// The layout of bind groups for this pipeline.
pub layout: Option<PipelineLayoutId>,
/// The compiled compute stage and its entry point.
pub stage: ProgrammableStageDescriptor<'a>,
/// The pipeline cache to use when creating this pipeline.
pub cache: Option<PipelineCacheId>,
}
/// Describes a compute pipeline.
#[derive(Clone, Debug)]
pub struct ResolvedComputePipelineDescriptor<'a> {
pub label: Label<'a>,
/// The layout of bind groups for this pipeline.
pub layout: Option<Arc<PipelineLayout>>,
/// The compiled compute stage and its entry point.
pub stage: ResolvedProgrammableStageDescriptor<'a>,
/// The pipeline cache to use when creating this pipeline.
pub cache: Option<Arc<PipelineCache>>,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreateComputePipelineError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Unable to derive an implicit layout")]
Implicit(#[from] ImplicitLayoutError),
#[error("Error matching shader requirements against the pipeline")]
Stage(#[from] validation::StageError),
#[error("Internal error: {0}")]
Internal(String),
#[error("Pipeline constant error: {0}")]
PipelineConstants(String),
#[error(transparent)]
MissingDownlevelFlags(#[from] MissingDownlevelFlags),
#[error(transparent)]
InvalidResource(#[from] InvalidResourceError),
}
#[derive(Debug)]
pub struct ComputePipeline {
pub(crate) raw: ManuallyDrop<Box<dyn hal::DynComputePipeline>>,
pub(crate) layout: Arc<PipelineLayout>,
pub(crate) device: Arc<Device>,
pub(crate) _shader_module: Arc<ShaderModule>,
pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
/// The `label` from the descriptor used to create the resource.
pub(crate) label: String,
pub(crate) tracking_data: TrackingData,
}
impl Drop for ComputePipeline {
fn drop(&mut self) {
resource_log!("Destroy raw {}", self.error_ident());
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
unsafe {
self.device.raw().destroy_compute_pipeline(raw);
}
}
}
crate::impl_resource_type!(ComputePipeline);
crate::impl_labeled!(ComputePipeline);
crate::impl_parent_device!(ComputePipeline);
crate::impl_storage_item!(ComputePipeline);
crate::impl_trackable!(ComputePipeline);
impl ComputePipeline {
pub(crate) fn raw(&self) -> &dyn hal::DynComputePipeline {
self.raw.as_ref()
}
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreatePipelineCacheError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Pipeline cache validation failed")]
Validation(#[from] PipelineCacheValidationError),
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
}
#[derive(Debug)]
pub struct PipelineCache {
pub(crate) raw: ManuallyDrop<Box<dyn hal::DynPipelineCache>>,
pub(crate) device: Arc<Device>,
/// The `label` from the descriptor used to create the resource.
pub(crate) label: String,
}
impl Drop for PipelineCache {
fn drop(&mut self) {
resource_log!("Destroy raw {}", self.error_ident());
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
unsafe {
self.device.raw().destroy_pipeline_cache(raw);
}
}
}
crate::impl_resource_type!(PipelineCache);
crate::impl_labeled!(PipelineCache);
crate::impl_parent_device!(PipelineCache);
crate::impl_storage_item!(PipelineCache);
impl PipelineCache {
pub(crate) fn raw(&self) -> &dyn hal::DynPipelineCache {
self.raw.as_ref()
}
}
/// Describes how the vertex buffer is interpreted.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "camelCase"))]
pub struct VertexBufferLayout<'a> {
/// The stride, in bytes, between elements of this buffer.
pub array_stride: wgt::BufferAddress,
/// How often this vertex buffer is "stepped" forward.
pub step_mode: wgt::VertexStepMode,
/// The list of attributes which comprise a single vertex.
pub attributes: Cow<'a, [wgt::VertexAttribute]>,
}
/// Describes the vertex process in a render pipeline.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct VertexState<'a> {
/// The compiled vertex stage and its entry point.
pub stage: ProgrammableStageDescriptor<'a>,
/// The format of any vertex buffers used with this pipeline.
pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
}
/// Describes the vertex process in a render pipeline.
#[derive(Clone, Debug)]
pub struct ResolvedVertexState<'a> {
/// The compiled vertex stage and its entry point.
pub stage: ResolvedProgrammableStageDescriptor<'a>,
/// The format of any vertex buffers used with this pipeline.
pub buffers: Cow<'a, [VertexBufferLayout<'a>]>,
}
/// Describes fragment processing in a render pipeline.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct FragmentState<'a> {
/// The compiled fragment stage and its entry point.
pub stage: ProgrammableStageDescriptor<'a>,
/// The effect of draw calls on the color aspect of the output target.
pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
}
/// Describes fragment processing in a render pipeline.
#[derive(Clone, Debug)]
pub struct ResolvedFragmentState<'a> {
/// The compiled fragment stage and its entry point.
pub stage: ResolvedProgrammableStageDescriptor<'a>,
/// The effect of draw calls on the color aspect of the output target.
pub targets: Cow<'a, [Option<wgt::ColorTargetState>]>,
}
/// Describes a render (graphics) pipeline.
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct RenderPipelineDescriptor<'a> {
pub label: Label<'a>,
/// The layout of bind groups for this pipeline.
pub layout: Option<PipelineLayoutId>,
/// The vertex processing state for this pipeline.
pub vertex: VertexState<'a>,
/// The properties of the pipeline at the primitive assembly and rasterization level.
#[cfg_attr(feature = "serde", serde(default))]
pub primitive: wgt::PrimitiveState,
/// The effect of draw calls on the depth and stencil aspects of the output target, if any.
#[cfg_attr(feature = "serde", serde(default))]
pub depth_stencil: Option<wgt::DepthStencilState>,
/// The multi-sampling properties of the pipeline.
#[cfg_attr(feature = "serde", serde(default))]
pub multisample: wgt::MultisampleState,
/// The fragment processing state for this pipeline.
pub fragment: Option<FragmentState<'a>>,
/// If the pipeline will be used with a multiview render pass, this indicates how many array
/// layers the attachments will have.
pub multiview: Option<NonZeroU32>,
/// The pipeline cache to use when creating this pipeline.
pub cache: Option<PipelineCacheId>,
}
/// Describes a render (graphics) pipeline.
#[derive(Clone, Debug)]
pub struct ResolvedRenderPipelineDescriptor<'a> {
pub label: Label<'a>,
/// The layout of bind groups for this pipeline.
pub layout: Option<Arc<PipelineLayout>>,
/// The vertex processing state for this pipeline.
pub vertex: ResolvedVertexState<'a>,
/// The properties of the pipeline at the primitive assembly and rasterization level.
pub primitive: wgt::PrimitiveState,
/// The effect of draw calls on the depth and stencil aspects of the output target, if any.
pub depth_stencil: Option<wgt::DepthStencilState>,
/// The multi-sampling properties of the pipeline.
pub multisample: wgt::MultisampleState,
/// The fragment processing state for this pipeline.
pub fragment: Option<ResolvedFragmentState<'a>>,
/// If the pipeline will be used with a multiview render pass, this indicates how many array
/// layers the attachments will have.
pub multiview: Option<NonZeroU32>,
/// The pipeline cache to use when creating this pipeline.
pub cache: Option<Arc<PipelineCache>>,
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct PipelineCacheDescriptor<'a> {
pub label: Label<'a>,
pub data: Option<Cow<'a, [u8]>>,
pub fallback: bool,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ColorStateError {
#[error("Format {0:?} is not renderable")]
FormatNotRenderable(wgt::TextureFormat),
#[error("Format {0:?} is not blendable")]
FormatNotBlendable(wgt::TextureFormat),
#[error("Format {0:?} does not have a color aspect")]
FormatNotColor(wgt::TextureFormat),
#[error("Sample count {0} is not supported by format {1:?} on this device. The WebGPU spec guarantees {2:?} samples are supported by this format. With the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature your device supports {3:?}.")]
InvalidSampleCount(u32, wgt::TextureFormat, Vec<u32>, Vec<u32>),
#[error("Output format {pipeline} is incompatible with the shader {shader}")]
IncompatibleFormat {
pipeline: validation::NumericType,
shader: validation::NumericType,
},
#[error("Invalid write mask {0:?}")]
InvalidWriteMask(wgt::ColorWrites),
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum DepthStencilStateError {
#[error("Format {0:?} is not renderable")]
FormatNotRenderable(wgt::TextureFormat),
#[error("Format {0:?} does not have a depth aspect, but depth test/write is enabled")]
FormatNotDepth(wgt::TextureFormat),
#[error("Format {0:?} does not have a stencil aspect, but stencil test/write is enabled")]
FormatNotStencil(wgt::TextureFormat),
#[error("Sample count {0} is not supported by format {1:?} on this device. The WebGPU spec guarantees {2:?} samples are supported by this format. With the TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES feature your device supports {3:?}.")]
InvalidSampleCount(u32, wgt::TextureFormat, Vec<u32>, Vec<u32>),
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum CreateRenderPipelineError {
#[error(transparent)]
ColorAttachment(#[from] ColorAttachmentError),
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Unable to derive an implicit layout")]
Implicit(#[from] ImplicitLayoutError),
#[error("Color state [{0}] is invalid")]
ColorState(u8, #[source] ColorStateError),
#[error("Depth/stencil state is invalid")]
DepthStencilState(#[from] DepthStencilStateError),
#[error("Invalid sample count {0}")]
InvalidSampleCount(u32),
#[error("The number of vertex buffers {given} exceeds the limit {limit}")]
TooManyVertexBuffers { given: u32, limit: u32 },
#[error("The total number of vertex attributes {given} exceeds the limit {limit}")]
TooManyVertexAttributes { given: u32, limit: u32 },
#[error("Vertex buffer {index} stride {given} exceeds the limit {limit}")]
VertexStrideTooLarge { index: u32, given: u32, limit: u32 },
#[error("Vertex attribute at location {location} stride {given} exceeds the limit {limit}")]
VertexAttributeStrideTooLarge {
location: wgt::ShaderLocation,
given: u32,
limit: u32,
},
#[error("Vertex buffer {index} stride {stride} does not respect `VERTEX_STRIDE_ALIGNMENT`")]
UnalignedVertexStride {
index: u32,
stride: wgt::BufferAddress,
},
#[error("Vertex attribute at location {location} has invalid offset {offset}")]
InvalidVertexAttributeOffset {
location: wgt::ShaderLocation,
offset: wgt::BufferAddress,
},
#[error("Two or more vertex attributes were assigned to the same location in the shader: {0}")]
ShaderLocationClash(u32),
#[error("Strip index format was not set to None but to {strip_index_format:?} while using the non-strip topology {topology:?}")]
StripIndexFormatForNonStripTopology {
strip_index_format: Option<wgt::IndexFormat>,
topology: wgt::PrimitiveTopology,
},
#[error("Conservative Rasterization is only supported for wgt::PolygonMode::Fill")]
ConservativeRasterizationNonFillPolygonMode,
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
#[error(transparent)]
MissingDownlevelFlags(#[from] MissingDownlevelFlags),
#[error("Error matching {stage:?} shader requirements against the pipeline")]
Stage {
stage: wgt::ShaderStages,
#[source]
error: validation::StageError,
},
#[error("Internal error in {stage:?} shader: {error}")]
Internal {
stage: wgt::ShaderStages,
error: String,
},
#[error("Pipeline constant error in {stage:?} shader: {error}")]
PipelineConstants {
stage: wgt::ShaderStages,
error: String,
},
#[error("In the provided shader, the type given for group {group} binding {binding} has a size of {size}. As the device does not support `DownlevelFlags::BUFFER_BINDINGS_NOT_16_BYTE_ALIGNED`, the type must have a size that is a multiple of 16 bytes.")]
UnalignedShader { group: u32, binding: u32, size: u64 },
#[error("Using the blend factor {factor:?} for render target {target} is not possible. Only the first render target may be used when dual-source blending.")]
BlendFactorOnUnsupportedTarget {
factor: wgt::BlendFactor,
target: u32,
},
#[error("Pipeline expects the shader entry point to make use of dual-source blending.")]
PipelineExpectsShaderToUseDualSourceBlending,
#[error("Shader entry point expects the pipeline to make use of dual-source blending.")]
ShaderExpectsPipelineToUseDualSourceBlending,
#[error("{}", concat!(
"At least one color attachment or depth-stencil attachment was expected, ",
"but no render target for the pipeline was specified."
))]
NoTargetSpecified,
#[error(transparent)]
InvalidResource(#[from] InvalidResourceError),
}
bitflags::bitflags! {
#[repr(transparent)]
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct PipelineFlags: u32 {
const BLEND_CONSTANT = 1 << 0;
const STENCIL_REFERENCE = 1 << 1;
const WRITES_DEPTH = 1 << 2;
const WRITES_STENCIL = 1 << 3;
}
}
/// How a render pipeline will retrieve attributes from a particular vertex buffer.
#[derive(Clone, Copy, Debug)]
pub struct VertexStep {
/// The byte stride in the buffer between one attribute value and the next.
pub stride: wgt::BufferAddress,
/// The byte size required to fit the last vertex in the stream.
pub last_stride: wgt::BufferAddress,
/// Whether the buffer is indexed by vertex number or instance number.
pub mode: wgt::VertexStepMode,
}
impl Default for VertexStep {
fn default() -> Self {
Self {
stride: 0,
last_stride: 0,
mode: wgt::VertexStepMode::Vertex,
}
}
}
#[derive(Debug)]
pub struct RenderPipeline {
pub(crate) raw: ManuallyDrop<Box<dyn hal::DynRenderPipeline>>,
pub(crate) device: Arc<Device>,
pub(crate) layout: Arc<PipelineLayout>,
pub(crate) _shader_modules: ArrayVec<Arc<ShaderModule>, { hal::MAX_CONCURRENT_SHADER_STAGES }>,
pub(crate) pass_context: RenderPassContext,
pub(crate) flags: PipelineFlags,
pub(crate) strip_index_format: Option<wgt::IndexFormat>,
pub(crate) vertex_steps: Vec<VertexStep>,
pub(crate) late_sized_buffer_groups: ArrayVec<LateSizedBufferGroup, { hal::MAX_BIND_GROUPS }>,
/// The `label` from the descriptor used to create the resource.
pub(crate) label: String,
pub(crate) tracking_data: TrackingData,
}
impl Drop for RenderPipeline {
fn drop(&mut self) {
resource_log!("Destroy raw {}", self.error_ident());
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
unsafe {
self.device.raw().destroy_render_pipeline(raw);
}
}
}
crate::impl_resource_type!(RenderPipeline);
crate::impl_labeled!(RenderPipeline);
crate::impl_parent_device!(RenderPipeline);
crate::impl_storage_item!(RenderPipeline);
crate::impl_trackable!(RenderPipeline);
impl RenderPipeline {
pub(crate) fn raw(&self) -> &dyn hal::DynRenderPipeline {
self.raw.as_ref()
}
}

529
vendor/wgpu-core/src/pipeline_cache.rs vendored Normal file
View File

@@ -0,0 +1,529 @@
use std::mem::size_of;
use thiserror::Error;
use wgt::AdapterInfo;
pub const HEADER_LENGTH: usize = size_of::<PipelineCacheHeader>();
#[derive(Debug, PartialEq, Eq, Clone, Error)]
#[non_exhaustive]
pub enum PipelineCacheValidationError {
#[error("The pipeline cache data was truncated")]
Truncated,
#[error("The pipeline cache data was longer than recorded")]
// TODO: Is it plausible that this would happen
Extended,
#[error("The pipeline cache data was corrupted (e.g. the hash didn't match)")]
Corrupted,
#[error("The pipeline cacha data was out of date and so cannot be safely used")]
Outdated,
#[error("The cache data was created for a different device")]
DeviceMismatch,
#[error("Pipeline cacha data was created for a future version of wgpu")]
Unsupported,
}
impl PipelineCacheValidationError {
/// Could the error have been avoided?
/// That is, is there a mistake in user code interacting with the cache
pub fn was_avoidable(&self) -> bool {
match self {
PipelineCacheValidationError::DeviceMismatch => true,
PipelineCacheValidationError::Truncated
| PipelineCacheValidationError::Unsupported
| PipelineCacheValidationError::Extended
// It's unusual, but not implausible, to be downgrading wgpu
| PipelineCacheValidationError::Outdated
| PipelineCacheValidationError::Corrupted => false,
}
}
}
/// Validate the data in a pipeline cache
pub fn validate_pipeline_cache<'d>(
cache_data: &'d [u8],
adapter: &AdapterInfo,
validation_key: [u8; 16],
) -> Result<&'d [u8], PipelineCacheValidationError> {
let adapter_key = adapter_key(adapter)?;
let Some((header, remaining_data)) = PipelineCacheHeader::read(cache_data) else {
return Err(PipelineCacheValidationError::Truncated);
};
if header.magic != MAGIC {
return Err(PipelineCacheValidationError::Corrupted);
}
if header.header_version != HEADER_VERSION {
return Err(PipelineCacheValidationError::Outdated);
}
if header.cache_abi != ABI {
return Err(PipelineCacheValidationError::Outdated);
}
if header.backend != adapter.backend as u8 {
return Err(PipelineCacheValidationError::DeviceMismatch);
}
if header.adapter_key != adapter_key {
return Err(PipelineCacheValidationError::DeviceMismatch);
}
if header.validation_key != validation_key {
// If the validation key is wrong, that means that this device has changed
// in a way where the cache won't be compatible since the cache was made,
// so it is outdated
return Err(PipelineCacheValidationError::Outdated);
}
let data_size: usize = header
.data_size
.try_into()
// If the data was previously more than 4GiB, and we're still on a 32 bit system (ABI check, above)
// Then the data must be corrupted
.map_err(|_| PipelineCacheValidationError::Corrupted)?;
if remaining_data.len() < data_size {
return Err(PipelineCacheValidationError::Truncated);
}
if remaining_data.len() > data_size {
return Err(PipelineCacheValidationError::Extended);
}
if header.hash_space != HASH_SPACE_VALUE {
return Err(PipelineCacheValidationError::Corrupted);
}
Ok(remaining_data)
}
pub fn add_cache_header(
in_region: &mut [u8],
data: &[u8],
adapter: &AdapterInfo,
validation_key: [u8; 16],
) {
assert_eq!(in_region.len(), HEADER_LENGTH);
let header = PipelineCacheHeader {
adapter_key: adapter_key(adapter)
.expect("Called add_cache_header for an adapter which doesn't support cache data. This is a wgpu internal bug"),
backend: adapter.backend as u8,
cache_abi: ABI,
magic: MAGIC,
header_version: HEADER_VERSION,
validation_key,
hash_space: HASH_SPACE_VALUE,
data_size: data
.len()
.try_into()
.expect("Cache larger than u64::MAX bytes"),
};
header.write(in_region);
}
const MAGIC: [u8; 8] = *b"WGPUPLCH";
const HEADER_VERSION: u32 = 1;
const ABI: u32 = size_of::<*const ()>() as u32;
/// The value used to fill [`PipelineCacheHeader::hash_space`]
///
/// If we receive reports of pipeline cache data corruption which is not otherwise caught
/// on a real device, it would be worth modifying this
///
/// Note that wgpu does not protect against malicious writes to e.g. a file used
/// to store a pipeline cache.
/// That is the resonsibility of the end application, such as by using a
/// private space.
const HASH_SPACE_VALUE: u64 = 0xFEDCBA9_876543210;
#[repr(C)]
#[derive(PartialEq, Eq)]
struct PipelineCacheHeader {
/// The magic header to ensure that we have the right file format
/// Has a value of MAGIC, as above
magic: [u8; 8],
// /// The total size of this header, in bytes
// header_size: u32,
/// The version of this wgpu header
/// Should be equal to HEADER_VERSION above
///
/// This must always be the second item, after the value above
header_version: u32,
/// The number of bytes in the pointers of this ABI, because some drivers
/// have previously not distinguished between their 32 bit and 64 bit drivers
/// leading to Vulkan data corruption
cache_abi: u32,
/// The id for the backend in use, from [wgt::Backend]
backend: u8,
/// The key which identifiers the device/adapter.
/// This is used to validate that this pipeline cache (probably) was produced for
/// the expected device.
/// On Vulkan: it is a combination of vendor ID and device ID
adapter_key: [u8; 15],
/// A key used to validate that this device is still compatible with the cache
///
/// This should e.g. contain driver version and/or intermediate compiler versions
validation_key: [u8; 16],
/// The length of the data which is sent to/recieved from the backend
data_size: u64,
/// Space reserved for a hash of the data in future
///
/// We assume that your cache storage system will be relatively robust, and so
/// do not validate this hash
///
/// Therefore, this will always have a value of [`HASH_SPACE_VALUE`]
hash_space: u64,
}
impl PipelineCacheHeader {
fn read(data: &[u8]) -> Option<(PipelineCacheHeader, &[u8])> {
let mut reader = Reader {
data,
total_read: 0,
};
let magic = reader.read_array()?;
let header_version = reader.read_u32()?;
let cache_abi = reader.read_u32()?;
let backend = reader.read_byte()?;
let adapter_key = reader.read_array()?;
let validation_key = reader.read_array()?;
let data_size = reader.read_u64()?;
let data_hash = reader.read_u64()?;
assert_eq!(reader.total_read, size_of::<PipelineCacheHeader>());
Some((
PipelineCacheHeader {
magic,
header_version,
cache_abi,
backend,
adapter_key,
validation_key,
data_size,
hash_space: data_hash,
},
reader.data,
))
}
fn write(&self, into: &mut [u8]) -> Option<()> {
let mut writer = Writer { data: into };
writer.write_array(&self.magic)?;
writer.write_u32(self.header_version)?;
writer.write_u32(self.cache_abi)?;
writer.write_byte(self.backend)?;
writer.write_array(&self.adapter_key)?;
writer.write_array(&self.validation_key)?;
writer.write_u64(self.data_size)?;
writer.write_u64(self.hash_space)?;
assert_eq!(writer.data.len(), 0);
Some(())
}
}
fn adapter_key(adapter: &AdapterInfo) -> Result<[u8; 15], PipelineCacheValidationError> {
match adapter.backend {
wgt::Backend::Vulkan => {
// If these change size, the header format needs to change
// We set the type explicitly so this won't compile in that case
let v: [u8; 4] = adapter.vendor.to_be_bytes();
let d: [u8; 4] = adapter.device.to_be_bytes();
let adapter = [
255, 255, 255, v[0], v[1], v[2], v[3], d[0], d[1], d[2], d[3], 255, 255, 255, 255,
];
Ok(adapter)
}
_ => Err(PipelineCacheValidationError::Unsupported),
}
}
struct Reader<'a> {
data: &'a [u8],
total_read: usize,
}
impl<'a> Reader<'a> {
fn read_byte(&mut self) -> Option<u8> {
let res = *self.data.first()?;
self.total_read += 1;
self.data = &self.data[1..];
Some(res)
}
fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
// Only greater than because we're indexing fenceposts, not items
if N > self.data.len() {
return None;
}
let (start, data) = self.data.split_at(N);
self.total_read += N;
self.data = data;
Some(start.try_into().expect("off-by-one-error in array size"))
}
// fn read_u16(&mut self) -> Option<u16> {
// self.read_array().map(u16::from_be_bytes)
// }
fn read_u32(&mut self) -> Option<u32> {
self.read_array().map(u32::from_be_bytes)
}
fn read_u64(&mut self) -> Option<u64> {
self.read_array().map(u64::from_be_bytes)
}
}
struct Writer<'a> {
data: &'a mut [u8],
}
impl<'a> Writer<'a> {
fn write_byte(&mut self, byte: u8) -> Option<()> {
self.write_array(&[byte])
}
fn write_array<const N: usize>(&mut self, array: &[u8; N]) -> Option<()> {
// Only greater than because we're indexing fenceposts, not items
if N > self.data.len() {
return None;
}
let data = std::mem::take(&mut self.data);
let (start, data) = data.split_at_mut(N);
self.data = data;
start.copy_from_slice(array);
Some(())
}
// fn write_u16(&mut self, value: u16) -> Option<()> {
// self.write_array(&value.to_be_bytes())
// }
fn write_u32(&mut self, value: u32) -> Option<()> {
self.write_array(&value.to_be_bytes())
}
fn write_u64(&mut self, value: u64) -> Option<()> {
self.write_array(&value.to_be_bytes())
}
}
#[cfg(test)]
mod tests {
use wgt::AdapterInfo;
use crate::pipeline_cache::{PipelineCacheValidationError as E, HEADER_LENGTH};
use super::ABI;
// Assert the correct size
const _: [(); HEADER_LENGTH] = [(); 64];
const ADAPTER: AdapterInfo = AdapterInfo {
name: String::new(),
vendor: 0x0002_FEED,
device: 0xFEFE_FEFE,
device_type: wgt::DeviceType::Other,
driver: String::new(),
driver_info: String::new(),
backend: wgt::Backend::Vulkan,
};
// IMPORTANT: If these tests fail, then you MUST increment HEADER_VERSION
const VALIDATION_KEY: [u8; 16] = u128::to_be_bytes(0xFFFFFFFF_FFFFFFFF_88888888_88888888);
#[test]
fn written_header() {
let mut result = [0; HEADER_LENGTH];
super::add_cache_header(&mut result, &[], &ADAPTER, VALIDATION_KEY);
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let expected = cache.into_iter().flatten().collect::<Vec<u8>>();
assert_eq!(result.as_slice(), expected.as_slice());
}
#[test]
fn valid_data() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let expected: &[u8] = &[];
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Ok(expected));
}
#[test]
fn invalid_magic() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"NOT_WGPU", // (Wrong) MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Corrupted));
}
#[test]
fn wrong_version() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 2, 0, 0, 0, ABI as u8], // (wrong) Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Outdated));
}
#[test]
fn wrong_abi() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
// a 14 bit ABI is improbable
[0, 0, 0, 1, 0, 0, 0, 14], // Version and (wrong) ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Header
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Outdated));
}
#[test]
fn wrong_backend() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[2, 255, 255, 255, 0, 2, 0xFE, 0xED], // (wrong) Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::DeviceMismatch));
}
#[test]
fn wrong_adapter() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0x00], // Backend and (wrong) Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::DeviceMismatch));
}
#[test]
fn wrong_validation() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_00000000u64.to_be_bytes(), // (wrong) Validation key
0x0u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Outdated));
}
#[test]
fn too_little_data() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x064u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Truncated));
}
#[test]
fn not_no_data() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
100u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache
.into_iter()
.flatten()
.chain(std::iter::repeat(0u8).take(100))
.collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
let expected: &[u8] = &[0; 100];
assert_eq!(validation_result, Ok(expected));
}
#[test]
fn too_much_data() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x064u64.to_be_bytes(), // Data size
0xFEDCBA9_876543210u64.to_be_bytes(), // Hash
];
let cache = cache
.into_iter()
.flatten()
.chain(std::iter::repeat(0u8).take(200))
.collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Extended));
}
#[test]
fn wrong_hash() {
let cache: [[u8; 8]; HEADER_LENGTH / 8] = [
*b"WGPUPLCH", // MAGIC
[0, 0, 0, 1, 0, 0, 0, ABI as u8], // Version and ABI
[1, 255, 255, 255, 0, 2, 0xFE, 0xED], // Backend and Adapter key
[0xFE, 0xFE, 0xFE, 0xFE, 255, 255, 255, 255], // Backend and Adapter key
0xFFFFFFFF_FFFFFFFFu64.to_be_bytes(), // Validation key
0x88888888_88888888u64.to_be_bytes(), // Validation key
0x0u64.to_be_bytes(), // Data size
0x00000000_00000000u64.to_be_bytes(), // Hash
];
let cache = cache.into_iter().flatten().collect::<Vec<u8>>();
let validation_result = super::validate_pipeline_cache(&cache, &ADAPTER, VALIDATION_KEY);
assert_eq!(validation_result, Err(E::Corrupted));
}
}

310
vendor/wgpu-core/src/pool.rs vendored Normal file
View File

@@ -0,0 +1,310 @@
use std::{
collections::{hash_map::Entry, HashMap},
hash::Hash,
sync::{Arc, Weak},
};
use once_cell::sync::OnceCell;
use crate::lock::{rank, Mutex};
use crate::FastHashMap;
type SlotInner<V> = Weak<V>;
type ResourcePoolSlot<V> = Arc<OnceCell<SlotInner<V>>>;
pub struct ResourcePool<K, V> {
inner: Mutex<FastHashMap<K, ResourcePoolSlot<V>>>,
}
impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
pub fn new() -> Self {
Self {
inner: Mutex::new(rank::RESOURCE_POOL_INNER, HashMap::default()),
}
}
/// Get a resource from the pool with the given entry map, or create a new
/// one if it doesn't exist using the given constructor.
///
/// Behaves such that only one resource will be created for each unique
/// entry map at any one time.
pub fn get_or_init<F, E>(&self, key: K, constructor: F) -> Result<Arc<V>, E>
where
F: FnOnce(K) -> Result<Arc<V>, E>,
{
// We can't prove at compile time that these will only ever be consumed once,
// so we need to do the check at runtime.
let mut key = Some(key);
let mut constructor = Some(constructor);
'race: loop {
let mut map_guard = self.inner.lock();
let entry = match map_guard.entry(key.clone().unwrap()) {
// An entry exists for this resource.
//
// We know that either:
// - The resource is still alive, and Weak::upgrade will succeed.
// - The resource is in the process of being dropped, and Weak::upgrade will fail.
//
// The entry will never be empty while the BGL is still alive.
Entry::Occupied(entry) => Arc::clone(entry.get()),
// No entry exists for this resource.
//
// We know that the resource is not alive, so we can create a new entry.
Entry::Vacant(entry) => Arc::clone(entry.insert(Arc::new(OnceCell::new()))),
};
drop(map_guard);
// Some other thread may beat us to initializing the entry, but OnceCell guarantees that only one thread
// will actually initialize the entry.
//
// We pass the strong reference outside of the closure to keep it alive while we're the only one keeping a reference to it.
let mut strong = None;
let weak = entry.get_or_try_init(|| {
let strong_inner = constructor.take().unwrap()(key.take().unwrap())?;
let weak = Arc::downgrade(&strong_inner);
strong = Some(strong_inner);
Ok(weak)
})?;
// If strong is Some, that means we just initialized the entry, so we can just return it.
if let Some(strong) = strong {
return Ok(strong);
}
// The entry was already initialized by someone else, so we need to try to upgrade it.
if let Some(strong) = weak.upgrade() {
// We succeed, the resource is still alive, just return that.
return Ok(strong);
}
// The resource is in the process of being dropped, because upgrade failed.
// The entry still exists in the map, but it points to nothing.
//
// We're in a race with the drop implementation of the resource,
// so lets just go around again. When we go around again:
// - If the entry exists, we might need to go around a few more times.
// - If the entry doesn't exist, we'll create a new one.
continue 'race;
}
}
/// Remove the given entry map from the pool.
///
/// Must *only* be called in the Drop impl of [`BindGroupLayout`].
///
/// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout
pub fn remove(&self, key: &K) {
let mut map_guard = self.inner.lock();
// Weak::upgrade will be failing long before this code is called. All threads trying to access the resource will be spinning,
// waiting for the entry to be removed. It is safe to remove the entry from the map.
map_guard.remove(key);
}
}
#[cfg(test)]
mod tests {
use std::sync::{
atomic::{AtomicU32, Ordering},
Barrier,
};
use super::*;
#[test]
fn deduplication() {
let pool = ResourcePool::<u32, u32>::new();
let mut counter = 0_u32;
let arc1 = pool
.get_or_init::<_, ()>(0, |key| {
counter += 1;
Ok(Arc::new(key))
})
.unwrap();
assert_eq!(*arc1, 0);
assert_eq!(counter, 1);
let arc2 = pool
.get_or_init::<_, ()>(0, |key| {
counter += 1;
Ok(Arc::new(key))
})
.unwrap();
assert!(Arc::ptr_eq(&arc1, &arc2));
assert_eq!(*arc2, 0);
assert_eq!(counter, 1);
drop(arc1);
drop(arc2);
pool.remove(&0);
let arc3 = pool
.get_or_init::<_, ()>(0, |key| {
counter += 1;
Ok(Arc::new(key))
})
.unwrap();
assert_eq!(*arc3, 0);
assert_eq!(counter, 2);
}
// Test name has "2_threads" in the name so nextest reserves two threads for it.
#[test]
fn concurrent_creation_2_threads() {
struct Resources {
pool: ResourcePool<u32, u32>,
counter: AtomicU32,
barrier: Barrier,
}
let resources = Arc::new(Resources {
pool: ResourcePool::<u32, u32>::new(),
counter: AtomicU32::new(0),
barrier: Barrier::new(2),
});
// Like all races, this is not inherently guaranteed to work, but in practice it should work fine.
//
// To validate the expected order of events, we've put print statements in the code, indicating when each thread is at a certain point.
// The output will look something like this if the test is working as expected:
//
// ```
// 0: prewait
// 1: prewait
// 1: postwait
// 0: postwait
// 1: init
// 1: postget
// 0: postget
// ```
fn thread_inner(idx: u8, resources: &Resources) -> Arc<u32> {
eprintln!("{idx}: prewait");
// Once this returns, both threads should hit get_or_init at about the same time,
// allowing us to actually test concurrent creation.
//
// Like all races, this is not inherently guaranteed to work, but in practice it should work fine.
resources.barrier.wait();
eprintln!("{idx}: postwait");
let ret = resources
.pool
.get_or_init::<_, ()>(0, |key| {
eprintln!("{idx}: init");
// Simulate long running constructor, ensuring that both threads will be in get_or_init.
std::thread::sleep(std::time::Duration::from_millis(250));
resources.counter.fetch_add(1, Ordering::SeqCst);
Ok(Arc::new(key))
})
.unwrap();
eprintln!("{idx}: postget");
ret
}
let thread1 = std::thread::spawn({
let resource_clone = Arc::clone(&resources);
move || thread_inner(1, &resource_clone)
});
let arc0 = thread_inner(0, &resources);
assert_eq!(resources.counter.load(Ordering::Acquire), 1);
let arc1 = thread1.join().unwrap();
assert!(Arc::ptr_eq(&arc0, &arc1));
}
// Test name has "2_threads" in the name so nextest reserves two threads for it.
#[test]
fn create_while_drop_2_threads() {
struct Resources {
pool: ResourcePool<u32, u32>,
barrier: Barrier,
}
let resources = Arc::new(Resources {
pool: ResourcePool::<u32, u32>::new(),
barrier: Barrier::new(2),
});
// Like all races, this is not inherently guaranteed to work, but in practice it should work fine.
//
// To validate the expected order of events, we've put print statements in the code, indicating when each thread is at a certain point.
// The output will look something like this if the test is working as expected:
//
// ```
// 0: prewait
// 1: prewait
// 1: postwait
// 0: postwait
// 1: postsleep
// 1: removal
// 0: postget
// ```
//
// The last two _may_ be flipped.
let existing_entry = resources
.pool
.get_or_init::<_, ()>(0, |key| Ok(Arc::new(key)))
.unwrap();
// Drop the entry, but do _not_ remove it from the pool.
// This simulates the situation where the resource arc has been dropped, but the Drop implementation
// has not yet run, which calls remove.
drop(existing_entry);
fn thread0_inner(resources: &Resources) {
eprintln!("0: prewait");
resources.barrier.wait();
eprintln!("0: postwait");
// We try to create a new entry, but the entry already exists.
//
// As Arc::upgrade is failing, we will just keep spinning until remove is called.
resources
.pool
.get_or_init::<_, ()>(0, |key| Ok(Arc::new(key)))
.unwrap();
eprintln!("0: postget");
}
fn thread1_inner(resources: &Resources) {
eprintln!("1: prewait");
resources.barrier.wait();
eprintln!("1: postwait");
// We wait a little bit, making sure that thread0_inner has started spinning.
std::thread::sleep(std::time::Duration::from_millis(250));
eprintln!("1: postsleep");
// We remove the entry from the pool, allowing thread0_inner to re-create.
resources.pool.remove(&0);
eprintln!("1: removal");
}
let thread1 = std::thread::spawn({
let resource_clone = Arc::clone(&resources);
move || thread1_inner(&resource_clone)
});
thread0_inner(&resources);
thread1.join().unwrap();
}
}

368
vendor/wgpu-core/src/present.rs vendored Normal file
View File

@@ -0,0 +1,368 @@
/*! Presentation.
## Lifecycle
Whenever a submission detects the use of any surface texture, it adds it to the device
tracker for the duration of the submission (temporarily, while recording).
It's added with `UNINITIALIZED` state and transitioned into `empty()` state.
When this texture is presented, we remove it from the device tracker as well as
extract it from the hub.
!*/
use std::{mem::ManuallyDrop, sync::Arc};
#[cfg(feature = "trace")]
use crate::device::trace::Action;
use crate::{
conv,
device::{Device, DeviceError, MissingDownlevelFlags, WaitIdleError},
global::Global,
hal_label, id,
instance::Surface,
resource,
};
use thiserror::Error;
use wgt::SurfaceStatus as Status;
const FRAME_TIMEOUT_MS: u32 = 1000;
#[derive(Debug)]
pub(crate) struct Presentation {
pub(crate) device: Arc<Device>,
pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
pub(crate) acquired_texture: Option<Arc<resource::Texture>>,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum SurfaceError {
#[error("Surface is invalid")]
Invalid,
#[error("Surface is not configured for presentation")]
NotConfigured,
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Surface image is already acquired")]
AlreadyAcquired,
#[error("Texture has been destroyed")]
TextureDestroyed,
}
#[derive(Clone, Debug, Error)]
#[non_exhaustive]
pub enum ConfigureSurfaceError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error("Invalid surface")]
InvalidSurface,
#[error("The view format {0:?} is not compatible with texture format {1:?}, only changing srgb-ness is allowed.")]
InvalidViewFormat(wgt::TextureFormat, wgt::TextureFormat),
#[error(transparent)]
MissingDownlevelFlags(#[from] MissingDownlevelFlags),
#[error("`SurfaceOutput` must be dropped before a new `Surface` is made")]
PreviousOutputExists,
#[error("Both `Surface` width and height must be non-zero. Wait to recreate the `Surface` until the window has non-zero area.")]
ZeroArea,
#[error("`Surface` width and height must be within the maximum supported texture size. Requested was ({width}, {height}), maximum extent for either dimension is {max_texture_dimension_2d}.")]
TooLarge {
width: u32,
height: u32,
max_texture_dimension_2d: u32,
},
#[error("Surface does not support the adapter's queue family")]
UnsupportedQueueFamily,
#[error("Requested format {requested:?} is not in list of supported formats: {available:?}")]
UnsupportedFormat {
requested: wgt::TextureFormat,
available: Vec<wgt::TextureFormat>,
},
#[error("Requested present mode {requested:?} is not in the list of supported present modes: {available:?}")]
UnsupportedPresentMode {
requested: wgt::PresentMode,
available: Vec<wgt::PresentMode>,
},
#[error("Requested alpha mode {requested:?} is not in the list of supported alpha modes: {available:?}")]
UnsupportedAlphaMode {
requested: wgt::CompositeAlphaMode,
available: Vec<wgt::CompositeAlphaMode>,
},
#[error("Requested usage {requested:?} is not in the list of supported usages: {available:?}")]
UnsupportedUsage {
requested: hal::TextureUses,
available: hal::TextureUses,
},
}
impl From<WaitIdleError> for ConfigureSurfaceError {
fn from(e: WaitIdleError) -> Self {
match e {
WaitIdleError::Device(d) => ConfigureSurfaceError::Device(d),
WaitIdleError::WrongSubmissionIndex(..) => unreachable!(),
}
}
}
#[derive(Debug)]
pub struct ResolvedSurfaceOutput {
pub status: Status,
pub texture: Option<Arc<resource::Texture>>,
}
#[repr(C)]
#[derive(Debug)]
pub struct SurfaceOutput {
pub status: Status,
pub texture_id: Option<id::TextureId>,
}
impl Surface {
pub fn get_current_texture(&self) -> Result<ResolvedSurfaceOutput, SurfaceError> {
profiling::scope!("Surface::get_current_texture");
let (device, config) = if let Some(ref present) = *self.presentation.lock() {
present.device.check_is_valid()?;
(present.device.clone(), present.config.clone())
} else {
return Err(SurfaceError::NotConfigured);
};
let fence = device.fence.read();
let suf = self.raw(device.backend()).unwrap();
let (texture, status) = match unsafe {
suf.acquire_texture(
Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
fence.as_ref(),
)
} {
Ok(Some(ast)) => {
drop(fence);
let texture_desc = wgt::TextureDescriptor {
label: Some(std::borrow::Cow::Borrowed("<Surface Texture>")),
size: wgt::Extent3d {
width: config.width,
height: config.height,
depth_or_array_layers: 1,
},
sample_count: 1,
mip_level_count: 1,
format: config.format,
dimension: wgt::TextureDimension::D2,
usage: config.usage,
view_formats: config.view_formats,
};
let format_features = wgt::TextureFormatFeatures {
allowed_usages: wgt::TextureUsages::RENDER_ATTACHMENT,
flags: wgt::TextureFormatFeatureFlags::MULTISAMPLE_X4
| wgt::TextureFormatFeatureFlags::MULTISAMPLE_RESOLVE,
};
let hal_usage = conv::map_texture_usage(
config.usage,
config.format.into(),
format_features.flags,
);
let clear_view_desc = hal::TextureViewDescriptor {
label: hal_label(
Some("(wgpu internal) clear surface texture view"),
device.instance_flags,
),
format: config.format,
dimension: wgt::TextureViewDimension::D2,
usage: hal::TextureUses::COLOR_TARGET,
range: wgt::ImageSubresourceRange::default(),
};
let clear_view = unsafe {
device
.raw()
.create_texture_view(ast.texture.as_ref().borrow(), &clear_view_desc)
}
.map_err(|e| device.handle_hal_error(e))?;
let mut presentation = self.presentation.lock();
let present = presentation.as_mut().unwrap();
let texture = resource::Texture::new(
&device,
resource::TextureInner::Surface { raw: ast.texture },
hal_usage,
&texture_desc,
format_features,
resource::TextureClearMode::Surface {
clear_view: ManuallyDrop::new(clear_view),
},
true,
);
let texture = Arc::new(texture);
device
.trackers
.lock()
.textures
.insert_single(&texture, hal::TextureUses::UNINITIALIZED);
if present.acquired_texture.is_some() {
return Err(SurfaceError::AlreadyAcquired);
}
present.acquired_texture = Some(texture.clone());
let status = if ast.suboptimal {
Status::Suboptimal
} else {
Status::Good
};
(Some(texture), status)
}
Ok(None) => (None, Status::Timeout),
Err(err) => (
None,
match err {
hal::SurfaceError::Lost => Status::Lost,
hal::SurfaceError::Device(err) => {
return Err(device.handle_hal_error(err).into());
}
hal::SurfaceError::Outdated => Status::Outdated,
hal::SurfaceError::Other(msg) => {
log::error!("acquire error: {}", msg);
Status::Lost
}
},
),
};
Ok(ResolvedSurfaceOutput { status, texture })
}
pub fn present(&self) -> Result<Status, SurfaceError> {
profiling::scope!("Surface::present");
let mut presentation = self.presentation.lock();
let present = match presentation.as_mut() {
Some(present) => present,
None => return Err(SurfaceError::NotConfigured),
};
let device = &present.device;
device.check_is_valid()?;
let queue = device.get_queue().unwrap();
let texture = present
.acquired_texture
.take()
.ok_or(SurfaceError::AlreadyAcquired)?;
let result = match texture.inner.snatch(&mut device.snatchable_lock.write()) {
None => return Err(SurfaceError::TextureDestroyed),
Some(resource::TextureInner::Surface { raw }) => {
let raw_surface = self.raw(device.backend()).unwrap();
let raw_queue = queue.raw();
unsafe { raw_queue.present(raw_surface, raw) }
}
_ => unreachable!(),
};
match result {
Ok(()) => Ok(Status::Good),
Err(err) => match err {
hal::SurfaceError::Lost => Ok(Status::Lost),
hal::SurfaceError::Device(err) => {
Err(SurfaceError::from(device.handle_hal_error(err)))
}
hal::SurfaceError::Outdated => Ok(Status::Outdated),
hal::SurfaceError::Other(msg) => {
log::error!("acquire error: {}", msg);
Err(SurfaceError::Invalid)
}
},
}
}
pub fn discard(&self) -> Result<(), SurfaceError> {
profiling::scope!("Surface::discard");
let mut presentation = self.presentation.lock();
let present = match presentation.as_mut() {
Some(present) => present,
None => return Err(SurfaceError::NotConfigured),
};
let device = &present.device;
device.check_is_valid()?;
let texture = present
.acquired_texture
.take()
.ok_or(SurfaceError::AlreadyAcquired)?;
match texture.inner.snatch(&mut device.snatchable_lock.write()) {
None => return Err(SurfaceError::TextureDestroyed),
Some(resource::TextureInner::Surface { raw }) => {
let raw_surface = self.raw(device.backend()).unwrap();
unsafe { raw_surface.discard_texture(raw) };
}
_ => unreachable!(),
}
Ok(())
}
}
impl Global {
pub fn surface_get_current_texture(
&self,
surface_id: id::SurfaceId,
texture_id_in: Option<id::TextureId>,
) -> Result<SurfaceOutput, SurfaceError> {
let surface = self.surfaces.get(surface_id);
let fid = self.hub.textures.prepare(texture_id_in);
#[cfg(feature = "trace")]
if let Some(present) = surface.presentation.lock().as_ref() {
if let Some(ref mut trace) = *present.device.trace.lock() {
trace.add(Action::GetSurfaceTexture {
id: fid.id(),
parent_id: surface_id,
});
}
}
let output = surface.get_current_texture()?;
let status = output.status;
let texture_id = output
.texture
.map(|texture| fid.assign(resource::Fallible::Valid(texture)));
Ok(SurfaceOutput { status, texture_id })
}
pub fn surface_present(&self, surface_id: id::SurfaceId) -> Result<Status, SurfaceError> {
let surface = self.surfaces.get(surface_id);
#[cfg(feature = "trace")]
if let Some(present) = surface.presentation.lock().as_ref() {
if let Some(ref mut trace) = *present.device.trace.lock() {
trace.add(Action::Present(surface_id));
}
}
surface.present()
}
pub fn surface_texture_discard(&self, surface_id: id::SurfaceId) -> Result<(), SurfaceError> {
let surface = self.surfaces.get(surface_id);
#[cfg(feature = "trace")]
if let Some(present) = surface.presentation.lock().as_ref() {
if let Some(ref mut trace) = *present.device.trace.lock() {
trace.add(Action::DiscardSurfaceTexture(surface_id));
}
}
surface.discard()
}
}

256
vendor/wgpu-core/src/ray_tracing.rs vendored Normal file
View File

@@ -0,0 +1,256 @@
// Ray tracing
// Major missing optimizations (no api surface changes needed):
// - use custom tracker to track build state
// - no forced rebuilt (build mode deduction)
// - lazy instance buffer allocation
// - maybe share scratch and instance staging buffer allocation
// - partial instance buffer uploads (api surface already designed with this in mind)
// - ([non performance] extract function in build (rust function extraction with guards is a pain))
use crate::{
command::CommandEncoderError,
device::{DeviceError, MissingFeatures},
id::{BlasId, BufferId, TlasId},
resource::{DestroyedResourceError, InvalidResourceError, MissingBufferUsageError},
};
use std::num::NonZeroU64;
use std::sync::Arc;
use crate::resource::{Blas, ResourceErrorIdent, Tlas};
use thiserror::Error;
use wgt::{AccelerationStructureGeometryFlags, BufferAddress, IndexFormat, VertexFormat};
#[derive(Clone, Debug, Error)]
pub enum CreateBlasError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
#[error(
"Only one of 'index_count' and 'index_format' was provided (either provide both or none)"
)]
MissingIndexData,
#[error("Provided format was not within allowed formats. Provided format: {0:?}. Allowed formats: {1:?}")]
InvalidVertexFormat(VertexFormat, Vec<VertexFormat>),
}
#[derive(Clone, Debug, Error)]
pub enum CreateTlasError {
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
}
/// Error encountered while attempting to do a copy on a command encoder.
#[derive(Clone, Debug, Error)]
pub enum BuildAccelerationStructureError {
#[error(transparent)]
Encoder(#[from] CommandEncoderError),
#[error(transparent)]
Device(#[from] DeviceError),
#[error(transparent)]
InvalidResource(#[from] InvalidResourceError),
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error(transparent)]
MissingBufferUsage(#[from] MissingBufferUsageError),
#[error(transparent)]
MissingFeatures(#[from] MissingFeatures),
#[error(
"Buffer {0:?} size is insufficient for provided size information (size: {1}, required: {2}"
)]
InsufficientBufferSize(ResourceErrorIdent, u64, u64),
#[error("Buffer {0:?} associated offset doesn't align with the index type")]
UnalignedIndexBufferOffset(ResourceErrorIdent),
#[error("Buffer {0:?} associated offset is unaligned")]
UnalignedTransformBufferOffset(ResourceErrorIdent),
#[error("Buffer {0:?} associated index count not divisible by 3 (count: {1}")]
InvalidIndexCount(ResourceErrorIdent, u32),
#[error("Buffer {0:?} associated data contains None")]
MissingAssociatedData(ResourceErrorIdent),
#[error(
"Blas {0:?} build sizes to may be greater than the descriptor at build time specified"
)]
IncompatibleBlasBuildSizes(ResourceErrorIdent),
#[error("Blas {0:?} flags are different, creation flags: {1:?}, provided: {2:?}")]
IncompatibleBlasFlags(
ResourceErrorIdent,
AccelerationStructureGeometryFlags,
AccelerationStructureGeometryFlags,
),
#[error("Blas {0:?} build vertex count is greater than creation count (needs to be less than or equal to), creation: {1:?}, build: {2:?}")]
IncompatibleBlasVertexCount(ResourceErrorIdent, u32, u32),
#[error("Blas {0:?} vertex formats are different, creation format: {1:?}, provided: {2:?}")]
DifferentBlasVertexFormats(ResourceErrorIdent, VertexFormat, VertexFormat),
#[error("Blas {0:?} index count was provided at creation or building, but not the other")]
BlasIndexCountProvidedMismatch(ResourceErrorIdent),
#[error("Blas {0:?} build index count is greater than creation count (needs to be less than or equal to), creation: {1:?}, build: {2:?}")]
IncompatibleBlasIndexCount(ResourceErrorIdent, u32, u32),
#[error("Blas {0:?} index formats are different, creation format: {1:?}, provided: {2:?}")]
DifferentBlasIndexFormats(ResourceErrorIdent, Option<IndexFormat>, Option<IndexFormat>),
#[error("Blas {0:?} build sizes require index buffer but none was provided")]
MissingIndexBuffer(ResourceErrorIdent),
#[error(
"Tlas {0:?} an associated instances contains an invalid custom index (more than 24bits)"
)]
TlasInvalidCustomIndex(ResourceErrorIdent),
#[error(
"Tlas {0:?} has {1} active instances but only {2} are allowed as specified by the descriptor at creation"
)]
TlasInstanceCountExceeded(ResourceErrorIdent, u32, u32),
}
#[derive(Clone, Debug, Error)]
pub enum ValidateBlasActionsError {
#[error("Blas {0:?} is used before it is built")]
UsedUnbuilt(ResourceErrorIdent),
}
#[derive(Clone, Debug, Error)]
pub enum ValidateTlasActionsError {
#[error(transparent)]
DestroyedResource(#[from] DestroyedResourceError),
#[error("Tlas {0:?} is used before it is built")]
UsedUnbuilt(ResourceErrorIdent),
#[error("Blas {0:?} is used before it is built (in Tlas {1:?})")]
UsedUnbuiltBlas(ResourceErrorIdent, ResourceErrorIdent),
#[error("Blas {0:?} is newer than the containing Tlas {1:?}")]
BlasNewerThenTlas(ResourceErrorIdent, ResourceErrorIdent),
}
#[derive(Debug)]
pub struct BlasTriangleGeometry<'a> {
pub size: &'a wgt::BlasTriangleGeometrySizeDescriptor,
pub vertex_buffer: BufferId,
pub index_buffer: Option<BufferId>,
pub transform_buffer: Option<BufferId>,
pub first_vertex: u32,
pub vertex_stride: BufferAddress,
pub first_index: Option<u32>,
pub transform_buffer_offset: Option<BufferAddress>,
}
pub enum BlasGeometries<'a> {
TriangleGeometries(Box<dyn Iterator<Item = BlasTriangleGeometry<'a>> + 'a>),
}
pub struct BlasBuildEntry<'a> {
pub blas_id: BlasId,
pub geometries: BlasGeometries<'a>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TlasBuildEntry {
pub tlas_id: TlasId,
pub instance_buffer_id: BufferId,
pub instance_count: u32,
}
#[derive(Debug)]
pub struct TlasInstance<'a> {
pub blas_id: BlasId,
pub transform: &'a [f32; 12],
pub custom_index: u32,
pub mask: u8,
}
pub struct TlasPackage<'a> {
pub tlas_id: TlasId,
pub instances: Box<dyn Iterator<Item = Option<TlasInstance<'a>>> + 'a>,
pub lowest_unmodified: u32,
}
#[derive(Debug, Copy, Clone)]
pub(crate) enum BlasActionKind {
Build(NonZeroU64),
Use,
}
#[derive(Debug, Clone)]
pub(crate) enum TlasActionKind {
Build {
build_index: NonZeroU64,
dependencies: Vec<Arc<Blas>>,
},
Use,
}
#[derive(Debug, Clone)]
pub(crate) struct BlasAction {
pub blas: Arc<Blas>,
pub kind: BlasActionKind,
}
#[derive(Debug, Clone)]
pub(crate) struct TlasAction {
pub tlas: Arc<Tlas>,
pub kind: TlasActionKind,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TraceBlasTriangleGeometry {
pub size: wgt::BlasTriangleGeometrySizeDescriptor,
pub vertex_buffer: BufferId,
pub index_buffer: Option<BufferId>,
pub transform_buffer: Option<BufferId>,
pub first_vertex: u32,
pub vertex_stride: BufferAddress,
pub first_index: Option<u32>,
pub transform_buffer_offset: Option<BufferAddress>,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum TraceBlasGeometries {
TriangleGeometries(Vec<TraceBlasTriangleGeometry>),
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TraceBlasBuildEntry {
pub blas_id: BlasId,
pub geometries: TraceBlasGeometries,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TraceTlasInstance {
pub blas_id: BlasId,
pub transform: [f32; 12],
pub custom_index: u32,
pub mask: u8,
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct TraceTlasPackage {
pub tlas_id: TlasId,
pub instances: Vec<Option<TraceTlasInstance>>,
pub lowest_unmodified: u32,
}

161
vendor/wgpu-core/src/registry.rs vendored Normal file
View File

@@ -0,0 +1,161 @@
use std::{mem::size_of, sync::Arc};
use crate::{
id::Id,
identity::IdentityManager,
lock::{rank, RwLock, RwLockReadGuard, RwLockWriteGuard},
storage::{Element, Storage, StorageItem},
};
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
pub struct RegistryReport {
pub num_allocated: usize,
pub num_kept_from_user: usize,
pub num_released_from_user: usize,
pub element_size: usize,
}
impl RegistryReport {
pub fn is_empty(&self) -> bool {
self.num_allocated + self.num_kept_from_user == 0
}
}
/// Registry is the primary holder of each resource type
/// Every resource is now arcanized so the last arc released
/// will in the end free the memory and release the inner raw resource
///
/// Registry act as the main entry point to keep resource alive
/// when created and released from user land code
///
/// A resource may still be alive when released from user land code
/// if it's used in active submission or anyway kept alive from
/// any other dependent resource
///
#[derive(Debug)]
pub(crate) struct Registry<T: StorageItem> {
// Must only contain an id which has either never been used or has been released from `storage`
identity: Arc<IdentityManager<T::Marker>>,
storage: RwLock<Storage<T>>,
}
impl<T: StorageItem> Registry<T> {
pub(crate) fn new() -> Self {
Self {
identity: Arc::new(IdentityManager::new()),
storage: RwLock::new(rank::REGISTRY_STORAGE, Storage::new()),
}
}
}
#[must_use]
pub(crate) struct FutureId<'a, T: StorageItem> {
id: Id<T::Marker>,
data: &'a RwLock<Storage<T>>,
}
impl<T: StorageItem> FutureId<'_, T> {
pub fn id(&self) -> Id<T::Marker> {
self.id
}
/// Assign a new resource to this ID.
///
/// Registers it with the registry.
pub fn assign(self, value: T) -> Id<T::Marker> {
let mut data = self.data.write();
data.insert(self.id, value);
self.id
}
}
impl<T: StorageItem> Registry<T> {
pub(crate) fn prepare(&self, id_in: Option<Id<T::Marker>>) -> FutureId<T> {
FutureId {
id: match id_in {
Some(id_in) => {
self.identity.mark_as_used(id_in);
id_in
}
None => self.identity.process(),
},
data: &self.storage,
}
}
#[track_caller]
pub(crate) fn read<'a>(&'a self) -> RwLockReadGuard<'a, Storage<T>> {
self.storage.read()
}
#[track_caller]
pub(crate) fn write<'a>(&'a self) -> RwLockWriteGuard<'a, Storage<T>> {
self.storage.write()
}
pub(crate) fn remove(&self, id: Id<T::Marker>) -> T {
let value = self.storage.write().remove(id);
// This needs to happen *after* removing it from the storage, to maintain the
// invariant that `self.identity` only contains ids which are actually available
// See https://github.com/gfx-rs/wgpu/issues/5372
self.identity.free(id);
//Returning None is legal if it's an error ID
value
}
pub(crate) fn generate_report(&self) -> RegistryReport {
let storage = self.storage.read();
let mut report = RegistryReport {
element_size: size_of::<T>(),
..Default::default()
};
report.num_allocated = self.identity.values.lock().count();
for element in storage.map.iter() {
match *element {
Element::Occupied(..) => report.num_kept_from_user += 1,
Element::Vacant => report.num_released_from_user += 1,
}
}
report
}
}
impl<T: StorageItem + Clone> Registry<T> {
pub(crate) fn get(&self, id: Id<T::Marker>) -> T {
self.read().get(id)
}
}
#[cfg(test)]
mod tests {
use std::sync::Arc;
use crate::{id::Marker, resource::ResourceType, storage::StorageItem};
use super::Registry;
struct TestData;
struct TestDataId;
impl Marker for TestDataId {}
impl ResourceType for TestData {
const TYPE: &'static str = "TestData";
}
impl StorageItem for TestData {
type Marker = TestDataId;
}
#[test]
fn simultaneous_registration() {
let registry = Registry::new();
std::thread::scope(|s| {
for _ in 0..5 {
s.spawn(|| {
for _ in 0..1000 {
let value = Arc::new(TestData);
let new_id = registry.prepare(None);
let id = new_id.assign(value);
registry.remove(id);
}
});
}
})
}
}

1988
vendor/wgpu-core/src/resource.rs vendored Normal file

File diff suppressed because it is too large Load Diff

43
vendor/wgpu-core/src/scratch.rs vendored Normal file
View File

@@ -0,0 +1,43 @@
use crate::device::{Device, DeviceError};
use crate::resource_log;
use hal::BufferUses;
use std::mem::ManuallyDrop;
use std::sync::Arc;
#[derive(Debug)]
pub struct ScratchBuffer {
raw: ManuallyDrop<Box<dyn hal::DynBuffer>>,
device: Arc<Device>,
}
impl ScratchBuffer {
pub(crate) fn new(device: &Arc<Device>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
let raw = unsafe {
device
.raw()
.create_buffer(&hal::BufferDescriptor {
label: Some("(wgpu) scratch buffer"),
size: size.get(),
usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
memory_flags: hal::MemoryFlags::empty(),
})
.map_err(DeviceError::from_hal)?
};
Ok(Self {
raw: ManuallyDrop::new(raw),
device: device.clone(),
})
}
pub(crate) fn raw(&self) -> &dyn hal::DynBuffer {
self.raw.as_ref()
}
}
impl Drop for ScratchBuffer {
fn drop(&mut self) {
resource_log!("Destroy raw ScratchBuffer");
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
unsafe { self.device.raw().destroy_buffer(raw) };
}
}

169
vendor/wgpu-core/src/snatch.rs vendored Normal file
View File

@@ -0,0 +1,169 @@
#![allow(unused)]
use crate::lock::{RwLock, RwLockReadGuard, RwLockWriteGuard};
use std::{
backtrace::Backtrace,
cell::{Cell, RefCell, UnsafeCell},
panic::{self, Location},
thread,
};
use crate::lock::rank;
/// A guard that provides read access to snatchable data.
pub struct SnatchGuard<'a>(RwLockReadGuard<'a, ()>);
/// A guard that allows snatching the snatchable data.
pub struct ExclusiveSnatchGuard<'a>(RwLockWriteGuard<'a, ()>);
/// A value that is mostly immutable but can be "snatched" if we need to destroy
/// it early.
///
/// In order to safely access the underlying data, the device's global snatchable
/// lock must be taken. To guarantee it, methods take a read or write guard of that
/// special lock.
pub struct Snatchable<T> {
value: UnsafeCell<Option<T>>,
}
impl<T> Snatchable<T> {
pub fn new(val: T) -> Self {
Snatchable {
value: UnsafeCell::new(Some(val)),
}
}
pub fn empty() -> Self {
Snatchable {
value: UnsafeCell::new(None),
}
}
/// Get read access to the value. Requires a the snatchable lock's read guard.
pub fn get<'a>(&'a self, _guard: &'a SnatchGuard) -> Option<&'a T> {
unsafe { (*self.value.get()).as_ref() }
}
/// Take the value. Requires a the snatchable lock's write guard.
pub fn snatch(&self, _guard: &mut ExclusiveSnatchGuard) -> Option<T> {
unsafe { (*self.value.get()).take() }
}
/// Take the value without a guard. This can only be used with exclusive access
/// to self, so it does not require locking.
///
/// Typically useful in a drop implementation.
pub fn take(&mut self) -> Option<T> {
self.value.get_mut().take()
}
}
// Can't safely print the contents of a snatchable object without holding
// the lock.
impl<T> std::fmt::Debug for Snatchable<T> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "<snatchable>")
}
}
unsafe impl<T> Sync for Snatchable<T> {}
struct LockTrace {
purpose: &'static str,
caller: &'static Location<'static>,
backtrace: Backtrace,
}
impl std::fmt::Display for LockTrace {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"a {} lock at {}\n{}",
self.purpose, self.caller, self.backtrace
)
}
}
#[cfg(debug_assertions)]
impl LockTrace {
#[track_caller]
fn enter(purpose: &'static str) {
let new = LockTrace {
purpose,
caller: Location::caller(),
backtrace: Backtrace::capture(),
};
if let Some(prev) = SNATCH_LOCK_TRACE.take() {
let current = thread::current();
let name = current.name().unwrap_or("<unnamed>");
panic!(
"thread '{name}' attempted to acquire a snatch lock recursively.\n\
- Currently trying to acquire {new}\n\
- Previously acquired {prev}",
);
} else {
SNATCH_LOCK_TRACE.set(Some(new));
}
}
fn exit() {
SNATCH_LOCK_TRACE.take();
}
}
#[cfg(not(debug_assertions))]
impl LockTrace {
fn enter(purpose: &'static str) {}
fn exit() {}
}
thread_local! {
static SNATCH_LOCK_TRACE: Cell<Option<LockTrace>> = const { Cell::new(None) };
}
/// A Device-global lock for all snatchable data.
pub struct SnatchLock {
lock: RwLock<()>,
}
impl SnatchLock {
/// The safety of `Snatchable::get` and `Snatchable::snatch` rely on their using of the
/// right SnatchLock (the one associated to the same device). This method is unsafe
/// to force force sers to think twice about creating a SnatchLock. The only place this
/// method should be called is when creating the device.
pub unsafe fn new(rank: rank::LockRank) -> Self {
SnatchLock {
lock: RwLock::new(rank, ()),
}
}
/// Request read access to snatchable resources.
#[track_caller]
pub fn read(&self) -> SnatchGuard {
LockTrace::enter("read");
SnatchGuard(self.lock.read())
}
/// Request write access to snatchable resources.
///
/// This should only be called when a resource needs to be snatched. This has
/// a high risk of causing lock contention if called concurrently with other
/// wgpu work.
#[track_caller]
pub fn write(&self) -> ExclusiveSnatchGuard {
LockTrace::enter("write");
ExclusiveSnatchGuard(self.lock.write())
}
}
impl Drop for SnatchGuard<'_> {
fn drop(&mut self) {
LockTrace::exit();
}
}
impl Drop for ExclusiveSnatchGuard<'_> {
fn drop(&mut self) {
LockTrace::exit();
}
}

137
vendor/wgpu-core/src/storage.rs vendored Normal file
View File

@@ -0,0 +1,137 @@
use std::sync::Arc;
use crate::id::{Id, Marker};
use crate::resource::ResourceType;
use crate::{Epoch, Index};
/// An entry in a `Storage::map` table.
#[derive(Debug)]
pub(crate) enum Element<T>
where
T: StorageItem,
{
/// There are no live ids with this index.
Vacant,
/// There is one live id with this index, allocated at the given
/// epoch.
Occupied(T, Epoch),
}
pub(crate) trait StorageItem: ResourceType {
type Marker: Marker;
}
impl<T: ResourceType> ResourceType for Arc<T> {
const TYPE: &'static str = T::TYPE;
}
impl<T: StorageItem> StorageItem for Arc<T> {
type Marker = T::Marker;
}
#[macro_export]
macro_rules! impl_storage_item {
($ty:ident) => {
impl $crate::storage::StorageItem for $ty {
type Marker = $crate::id::markers::$ty;
}
};
}
/// A table of `T` values indexed by the id type `I`.
///
/// `Storage` implements [`std::ops::Index`], accepting `Id` values as
/// indices.
///
/// The table is represented as a vector indexed by the ids' index
/// values, so you should use an id allocator like `IdentityManager`
/// that keeps the index values dense and close to zero.
#[derive(Debug)]
pub(crate) struct Storage<T>
where
T: StorageItem,
{
pub(crate) map: Vec<Element<T>>,
kind: &'static str,
}
impl<T> Storage<T>
where
T: StorageItem,
{
pub(crate) fn new() -> Self {
Self {
map: Vec::new(),
kind: T::TYPE,
}
}
}
impl<T> Storage<T>
where
T: StorageItem,
{
pub(crate) fn insert(&mut self, id: Id<T::Marker>, value: T) {
let (index, epoch) = id.unzip();
let index = index as usize;
if index >= self.map.len() {
self.map.resize_with(index + 1, || Element::Vacant);
}
match std::mem::replace(&mut self.map[index], Element::Occupied(value, epoch)) {
Element::Vacant => {}
Element::Occupied(_, storage_epoch) => {
assert_ne!(
epoch,
storage_epoch,
"Index {index:?} of {} is already occupied",
T::TYPE
);
}
}
}
pub(crate) fn remove(&mut self, id: Id<T::Marker>) -> T {
let (index, epoch) = id.unzip();
match std::mem::replace(&mut self.map[index as usize], Element::Vacant) {
Element::Occupied(value, storage_epoch) => {
assert_eq!(epoch, storage_epoch);
value
}
Element::Vacant => panic!("Cannot remove a vacant resource"),
}
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (Id<T::Marker>, &T)> {
self.map
.iter()
.enumerate()
.filter_map(move |(index, x)| match *x {
Element::Occupied(ref value, storage_epoch) => {
Some((Id::zip(index as Index, storage_epoch), value))
}
_ => None,
})
}
}
impl<T> Storage<T>
where
T: StorageItem + Clone,
{
/// Get an owned reference to an item.
/// Panics if there is an epoch mismatch, the entry is empty or in error.
pub(crate) fn get(&self, id: Id<T::Marker>) -> T {
let (index, epoch) = id.unzip();
let (result, storage_epoch) = match self.map.get(index as usize) {
Some(&Element::Occupied(ref v, epoch)) => (v.clone(), epoch),
None | Some(&Element::Vacant) => panic!("{}[{:?}] does not exist", self.kind, id),
};
assert_eq!(
epoch, storage_epoch,
"{}[{:?}] is no longer alive",
self.kind, id
);
result
}
}

773
vendor/wgpu-core/src/track/buffer.rs vendored Normal file
View File

@@ -0,0 +1,773 @@
//! Buffer Trackers
//!
//! Buffers are represented by a single state for the whole resource,
//! a 16 bit bitflag of buffer usages. Because there is only ever
//! one subresource, they have no selector.
use std::sync::{Arc, Weak};
use super::{PendingTransition, TrackerIndex};
use crate::{
resource::{Buffer, Trackable},
snatch::SnatchGuard,
track::{
invalid_resource_state, skip_barrier, ResourceMetadata, ResourceMetadataProvider,
ResourceUsageCompatibilityError, ResourceUses,
},
};
use hal::{BufferBarrier, BufferUses};
use wgt::{strict_assert, strict_assert_eq};
impl ResourceUses for BufferUses {
const EXCLUSIVE: Self = Self::EXCLUSIVE;
type Selector = ();
fn bits(self) -> u16 {
Self::bits(&self)
}
fn all_ordered(self) -> bool {
Self::ORDERED.contains(self)
}
fn any_exclusive(self) -> bool {
self.intersects(Self::EXCLUSIVE)
}
}
/// Stores a bind group's buffers + their usages (within the bind group).
#[derive(Debug)]
pub(crate) struct BufferBindGroupState {
buffers: Vec<(Arc<Buffer>, BufferUses)>,
}
impl BufferBindGroupState {
pub fn new() -> Self {
Self {
buffers: Vec::new(),
}
}
/// Optimize the buffer bind group state by sorting it by ID.
///
/// When this list of states is merged into a tracker, the memory
/// accesses will be in a constant ascending order.
pub(crate) fn optimize(&mut self) {
self.buffers
.sort_unstable_by_key(|(b, _)| b.tracker_index());
}
/// Returns a list of all buffers tracked. May contain duplicates.
pub fn used_tracker_indices(&self) -> impl Iterator<Item = TrackerIndex> + '_ {
self.buffers
.iter()
.map(|(b, _)| b.tracker_index())
.collect::<Vec<_>>()
.into_iter()
}
/// Adds the given resource with the given state.
pub fn insert_single(&mut self, buffer: Arc<Buffer>, state: BufferUses) {
self.buffers.push((buffer, state));
}
}
/// Stores all buffer state within a single usage scope.
#[derive(Debug)]
pub(crate) struct BufferUsageScope {
state: Vec<BufferUses>,
metadata: ResourceMetadata<Arc<Buffer>>,
}
impl Default for BufferUsageScope {
fn default() -> Self {
Self {
state: Vec::new(),
metadata: ResourceMetadata::new(),
}
}
}
impl BufferUsageScope {
fn tracker_assert_in_bounds(&self, index: usize) {
strict_assert!(index < self.state.len());
self.metadata.tracker_assert_in_bounds(index);
}
pub fn clear(&mut self) {
self.state.clear();
self.metadata.clear();
}
/// Sets the size of all the vectors inside the tracker.
///
/// Must be called with the highest possible Buffer ID before
/// all unsafe functions are called.
pub fn set_size(&mut self, size: usize) {
self.state.resize(size, BufferUses::empty());
self.metadata.set_size(size);
}
/// Extend the vectors to let the given index be valid.
fn allow_index(&mut self, index: usize) {
if index >= self.state.len() {
self.set_size(index + 1);
}
}
/// Merge the list of buffer states in the given bind group into this usage scope.
///
/// If any of the resulting states is invalid, stops the merge and returns a usage
/// conflict with the details of the invalid state.
///
/// Because bind groups do not check if the union of all their states is valid,
/// this method is allowed to return Err on the first bind group bound.
///
/// # Safety
///
/// [`Self::set_size`] must be called with the maximum possible Buffer ID before this
/// method is called.
pub unsafe fn merge_bind_group(
&mut self,
bind_group: &BufferBindGroupState,
) -> Result<(), ResourceUsageCompatibilityError> {
for &(ref resource, state) in bind_group.buffers.iter() {
let index = resource.tracker_index().as_usize();
unsafe {
self.insert_or_merge(
index as _,
index,
BufferStateProvider::Direct { state },
ResourceMetadataProvider::Direct { resource },
)?
};
}
Ok(())
}
/// Merge the list of buffer states in the given usage scope into this UsageScope.
///
/// If any of the resulting states is invalid, stops the merge and returns a usage
/// conflict with the details of the invalid state.
///
/// If the given tracker uses IDs higher than the length of internal vectors,
/// the vectors will be extended. A call to set_size is not needed.
pub fn merge_usage_scope(
&mut self,
scope: &Self,
) -> Result<(), ResourceUsageCompatibilityError> {
let incoming_size = scope.state.len();
if incoming_size > self.state.len() {
self.set_size(incoming_size);
}
for index in scope.metadata.owned_indices() {
self.tracker_assert_in_bounds(index);
scope.tracker_assert_in_bounds(index);
unsafe {
self.insert_or_merge(
index as u32,
index,
BufferStateProvider::Indirect {
state: &scope.state,
},
ResourceMetadataProvider::Indirect {
metadata: &scope.metadata,
},
)?;
};
}
Ok(())
}
/// Merge a single state into the UsageScope.
///
/// If the resulting state is invalid, returns a usage
/// conflict with the details of the invalid state.
///
/// If the ID is higher than the length of internal vectors,
/// the vectors will be extended. A call to set_size is not needed.
pub fn merge_single(
&mut self,
buffer: &Arc<Buffer>,
new_state: BufferUses,
) -> Result<(), ResourceUsageCompatibilityError> {
let index = buffer.tracker_index().as_usize();
self.allow_index(index);
self.tracker_assert_in_bounds(index);
unsafe {
self.insert_or_merge(
index as _,
index,
BufferStateProvider::Direct { state: new_state },
ResourceMetadataProvider::Direct { resource: buffer },
)?;
}
Ok(())
}
/// Does an insertion operation if the index isn't tracked
/// in the current metadata, otherwise merges the given state
/// with the current state. If the merging would cause
/// a conflict, returns that usage conflict.
///
/// # Safety
///
/// Indexes must be valid indexes into all arrays passed in
/// to this function, either directly or via metadata or provider structs.
#[inline(always)]
unsafe fn insert_or_merge(
&mut self,
index32: u32,
index: usize,
state_provider: BufferStateProvider<'_>,
metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
) -> Result<(), ResourceUsageCompatibilityError> {
let currently_owned = unsafe { self.metadata.contains_unchecked(index) };
if !currently_owned {
unsafe {
insert(
None,
&mut self.state,
&mut self.metadata,
index,
state_provider,
None,
metadata_provider,
)
};
return Ok(());
}
unsafe {
merge(
&mut self.state,
index32,
index,
state_provider,
metadata_provider,
)
}
}
}
/// Stores all buffer state within a command buffer.
pub(crate) struct BufferTracker {
start: Vec<BufferUses>,
end: Vec<BufferUses>,
metadata: ResourceMetadata<Arc<Buffer>>,
temp: Vec<PendingTransition<BufferUses>>,
}
impl BufferTracker {
pub fn new() -> Self {
Self {
start: Vec::new(),
end: Vec::new(),
metadata: ResourceMetadata::new(),
temp: Vec::new(),
}
}
fn tracker_assert_in_bounds(&self, index: usize) {
strict_assert!(index < self.start.len());
strict_assert!(index < self.end.len());
self.metadata.tracker_assert_in_bounds(index);
}
/// Sets the size of all the vectors inside the tracker.
///
/// Must be called with the highest possible Buffer ID before
/// all unsafe functions are called.
pub fn set_size(&mut self, size: usize) {
self.start.resize(size, BufferUses::empty());
self.end.resize(size, BufferUses::empty());
self.metadata.set_size(size);
}
/// Extend the vectors to let the given index be valid.
fn allow_index(&mut self, index: usize) {
if index >= self.start.len() {
self.set_size(index + 1);
}
}
/// Returns true if the given buffer is tracked.
pub fn contains(&self, buffer: &Buffer) -> bool {
self.metadata.contains(buffer.tracker_index().as_usize())
}
/// Returns a list of all buffers tracked.
pub fn used_resources(&self) -> impl Iterator<Item = &Arc<Buffer>> + '_ {
self.metadata.owned_resources()
}
/// Drains all currently pending transitions.
pub fn drain_transitions<'a, 'b: 'a>(
&'b mut self,
snatch_guard: &'a SnatchGuard<'a>,
) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
let buffer_barriers = self.temp.drain(..).map(|pending| {
let buf = unsafe { self.metadata.get_resource_unchecked(pending.id as _) };
pending.into_hal(buf, snatch_guard)
});
buffer_barriers
}
/// Sets the state of a single buffer.
///
/// If a transition is needed to get the buffer into the given state, that transition
/// is returned. No more than one transition is needed.
///
/// If the ID is higher than the length of internal vectors,
/// the vectors will be extended. A call to set_size is not needed.
pub fn set_single(
&mut self,
buffer: &Arc<Buffer>,
state: BufferUses,
) -> Option<PendingTransition<BufferUses>> {
let index: usize = buffer.tracker_index().as_usize();
self.allow_index(index);
self.tracker_assert_in_bounds(index);
unsafe {
self.insert_or_barrier_update(
index,
BufferStateProvider::Direct { state },
None,
ResourceMetadataProvider::Direct { resource: buffer },
)
};
strict_assert!(self.temp.len() <= 1);
self.temp.pop()
}
/// Sets the given state for all buffers in the given tracker.
///
/// If a transition is needed to get the buffers into the needed state,
/// those transitions are stored within the tracker. A subsequent
/// call to [`Self::drain_transitions`] is needed to get those transitions.
///
/// If the ID is higher than the length of internal vectors,
/// the vectors will be extended. A call to set_size is not needed.
pub fn set_from_tracker(&mut self, tracker: &Self) {
let incoming_size = tracker.start.len();
if incoming_size > self.start.len() {
self.set_size(incoming_size);
}
for index in tracker.metadata.owned_indices() {
self.tracker_assert_in_bounds(index);
tracker.tracker_assert_in_bounds(index);
unsafe {
self.insert_or_barrier_update(
index,
BufferStateProvider::Indirect {
state: &tracker.start,
},
Some(BufferStateProvider::Indirect {
state: &tracker.end,
}),
ResourceMetadataProvider::Indirect {
metadata: &tracker.metadata,
},
)
}
}
}
/// Sets the given state for all buffers in the given UsageScope.
///
/// If a transition is needed to get the buffers into the needed state,
/// those transitions are stored within the tracker. A subsequent
/// call to [`Self::drain_transitions`] is needed to get those transitions.
///
/// If the ID is higher than the length of internal vectors,
/// the vectors will be extended. A call to set_size is not needed.
pub fn set_from_usage_scope(&mut self, scope: &BufferUsageScope) {
let incoming_size = scope.state.len();
if incoming_size > self.start.len() {
self.set_size(incoming_size);
}
for index in scope.metadata.owned_indices() {
self.tracker_assert_in_bounds(index);
scope.tracker_assert_in_bounds(index);
unsafe {
self.insert_or_barrier_update(
index,
BufferStateProvider::Indirect {
state: &scope.state,
},
None,
ResourceMetadataProvider::Indirect {
metadata: &scope.metadata,
},
)
}
}
}
/// Iterates through all buffers in the given bind group and adopts
/// the state given for those buffers in the UsageScope. It also
/// removes all touched buffers from the usage scope.
///
/// If a transition is needed to get the buffers into the needed state,
/// those transitions are stored within the tracker. A subsequent
/// call to [`Self::drain_transitions`] is needed to get those transitions.
///
/// This is a really funky method used by Compute Passes to generate
/// barriers after a call to dispatch without needing to iterate
/// over all elements in the usage scope. We use each the
/// a given iterator of ids as a source of which IDs to look at.
/// All the IDs must have first been added to the usage scope.
///
/// # Safety
///
/// [`Self::set_size`] must be called with the maximum possible Buffer ID before this
/// method is called.
pub unsafe fn set_and_remove_from_usage_scope_sparse(
&mut self,
scope: &mut BufferUsageScope,
index_source: impl IntoIterator<Item = TrackerIndex>,
) {
let incoming_size = scope.state.len();
if incoming_size > self.start.len() {
self.set_size(incoming_size);
}
for index in index_source {
let index = index.as_usize();
scope.tracker_assert_in_bounds(index);
if unsafe { !scope.metadata.contains_unchecked(index) } {
continue;
}
unsafe {
self.insert_or_barrier_update(
index,
BufferStateProvider::Indirect {
state: &scope.state,
},
None,
ResourceMetadataProvider::Indirect {
metadata: &scope.metadata,
},
)
};
unsafe { scope.metadata.remove(index) };
}
}
/// If the resource isn't tracked
/// - Inserts the given resource.
/// - Uses the `start_state_provider` to populate `start_states`
/// - Uses either `end_state_provider` or `start_state_provider`
/// to populate `current_states`.
///
/// If the resource is tracked
/// - Inserts barriers from the state in `current_states`
/// to the state provided by `start_state_provider`.
/// - Updates the `current_states` with either the state from
/// `end_state_provider` or `start_state_provider`.
///
/// Any barriers are added to the barrier vector.
///
/// # Safety
///
/// Indexes must be valid indexes into all arrays passed in
/// to this function, either directly or via metadata or provider structs.
#[inline(always)]
unsafe fn insert_or_barrier_update(
&mut self,
index: usize,
start_state_provider: BufferStateProvider<'_>,
end_state_provider: Option<BufferStateProvider<'_>>,
metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
) {
let currently_owned = unsafe { self.metadata.contains_unchecked(index) };
if !currently_owned {
unsafe {
insert(
Some(&mut self.start),
&mut self.end,
&mut self.metadata,
index,
start_state_provider,
end_state_provider,
metadata_provider,
)
};
return;
}
let update_state_provider =
end_state_provider.unwrap_or_else(|| start_state_provider.clone());
unsafe { barrier(&mut self.end, index, start_state_provider, &mut self.temp) };
unsafe { update(&mut self.end, index, update_state_provider) };
}
}
/// Stores all buffer state within a device.
pub(crate) struct DeviceBufferTracker {
current_states: Vec<BufferUses>,
metadata: ResourceMetadata<Weak<Buffer>>,
temp: Vec<PendingTransition<BufferUses>>,
}
impl DeviceBufferTracker {
pub fn new() -> Self {
Self {
current_states: Vec::new(),
metadata: ResourceMetadata::new(),
temp: Vec::new(),
}
}
fn tracker_assert_in_bounds(&self, index: usize) {
strict_assert!(index < self.current_states.len());
self.metadata.tracker_assert_in_bounds(index);
}
/// Extend the vectors to let the given index be valid.
fn allow_index(&mut self, index: usize) {
if index >= self.current_states.len() {
self.current_states.resize(index + 1, BufferUses::empty());
self.metadata.set_size(index + 1);
}
}
/// Returns a list of all buffers tracked.
pub fn used_resources(&self) -> impl Iterator<Item = &Weak<Buffer>> + '_ {
self.metadata.owned_resources()
}
/// Inserts a single buffer and its state into the resource tracker.
///
/// If the resource already exists in the tracker, it will be overwritten.
pub fn insert_single(&mut self, buffer: &Arc<Buffer>, state: BufferUses) {
let index = buffer.tracker_index().as_usize();
self.allow_index(index);
self.tracker_assert_in_bounds(index);
unsafe {
insert(
None,
&mut self.current_states,
&mut self.metadata,
index,
BufferStateProvider::Direct { state },
None,
ResourceMetadataProvider::Direct {
resource: &Arc::downgrade(buffer),
},
)
}
}
/// Sets the state of a single buffer.
///
/// If a transition is needed to get the buffer into the given state, that transition
/// is returned. No more than one transition is needed.
pub fn set_single(
&mut self,
buffer: &Arc<Buffer>,
state: BufferUses,
) -> Option<PendingTransition<BufferUses>> {
let index: usize = buffer.tracker_index().as_usize();
self.tracker_assert_in_bounds(index);
let start_state_provider = BufferStateProvider::Direct { state };
unsafe {
barrier(
&mut self.current_states,
index,
start_state_provider.clone(),
&mut self.temp,
)
};
unsafe { update(&mut self.current_states, index, start_state_provider) };
strict_assert!(self.temp.len() <= 1);
self.temp.pop()
}
/// Sets the given state for all buffers in the given tracker.
///
/// If a transition is needed to get the buffers into the needed state,
/// those transitions are returned.
pub fn set_from_tracker_and_drain_transitions<'a, 'b: 'a>(
&'a mut self,
tracker: &'a BufferTracker,
snatch_guard: &'b SnatchGuard<'b>,
) -> impl Iterator<Item = BufferBarrier<'a, dyn hal::DynBuffer>> {
for index in tracker.metadata.owned_indices() {
self.tracker_assert_in_bounds(index);
let start_state_provider = BufferStateProvider::Indirect {
state: &tracker.start,
};
let end_state_provider = BufferStateProvider::Indirect {
state: &tracker.end,
};
unsafe {
barrier(
&mut self.current_states,
index,
start_state_provider,
&mut self.temp,
)
};
unsafe { update(&mut self.current_states, index, end_state_provider) };
}
self.temp.drain(..).map(|pending| {
let buf = unsafe { tracker.metadata.get_resource_unchecked(pending.id as _) };
pending.into_hal(buf, snatch_guard)
})
}
}
/// Source of Buffer State.
#[derive(Debug, Clone)]
enum BufferStateProvider<'a> {
/// Get a state that was provided directly.
Direct { state: BufferUses },
/// Get a state from an an array of states.
Indirect { state: &'a [BufferUses] },
}
impl BufferStateProvider<'_> {
/// Gets the state from the provider, given a resource ID index.
///
/// # Safety
///
/// Index must be in bounds for the indirect source iff this is in the indirect state.
#[inline(always)]
unsafe fn get_state(&self, index: usize) -> BufferUses {
match *self {
BufferStateProvider::Direct { state } => state,
BufferStateProvider::Indirect { state } => {
strict_assert!(index < state.len());
*unsafe { state.get_unchecked(index) }
}
}
}
}
#[inline(always)]
unsafe fn insert<T: Clone>(
start_states: Option<&mut [BufferUses]>,
current_states: &mut [BufferUses],
resource_metadata: &mut ResourceMetadata<T>,
index: usize,
start_state_provider: BufferStateProvider<'_>,
end_state_provider: Option<BufferStateProvider<'_>>,
metadata_provider: ResourceMetadataProvider<'_, T>,
) {
let new_start_state = unsafe { start_state_provider.get_state(index) };
let new_end_state =
end_state_provider.map_or(new_start_state, |p| unsafe { p.get_state(index) });
// This should only ever happen with a wgpu bug, but let's just double
// check that resource states don't have any conflicts.
strict_assert_eq!(invalid_resource_state(new_start_state), false);
strict_assert_eq!(invalid_resource_state(new_end_state), false);
unsafe {
if let Some(&mut ref mut start_state) = start_states {
*start_state.get_unchecked_mut(index) = new_start_state;
}
*current_states.get_unchecked_mut(index) = new_end_state;
let resource = metadata_provider.get(index);
resource_metadata.insert(index, resource.clone());
}
}
#[inline(always)]
unsafe fn merge(
current_states: &mut [BufferUses],
_index32: u32,
index: usize,
state_provider: BufferStateProvider<'_>,
metadata_provider: ResourceMetadataProvider<'_, Arc<Buffer>>,
) -> Result<(), ResourceUsageCompatibilityError> {
let current_state = unsafe { current_states.get_unchecked_mut(index) };
let new_state = unsafe { state_provider.get_state(index) };
let merged_state = *current_state | new_state;
if invalid_resource_state(merged_state) {
return Err(ResourceUsageCompatibilityError::from_buffer(
unsafe { metadata_provider.get(index) },
*current_state,
new_state,
));
}
*current_state = merged_state;
Ok(())
}
#[inline(always)]
unsafe fn barrier(
current_states: &mut [BufferUses],
index: usize,
state_provider: BufferStateProvider<'_>,
barriers: &mut Vec<PendingTransition<BufferUses>>,
) {
let current_state = unsafe { *current_states.get_unchecked(index) };
let new_state = unsafe { state_provider.get_state(index) };
if skip_barrier(current_state, new_state) {
return;
}
barriers.push(PendingTransition {
id: index as _,
selector: (),
usage: hal::StateTransition {
from: current_state,
to: new_state,
},
});
}
#[inline(always)]
unsafe fn update(
current_states: &mut [BufferUses],
index: usize,
state_provider: BufferStateProvider<'_>,
) {
let current_state = unsafe { current_states.get_unchecked_mut(index) };
let new_state = unsafe { state_provider.get_state(index) };
*current_state = new_state;
}

207
vendor/wgpu-core/src/track/metadata.rs vendored Normal file
View File

@@ -0,0 +1,207 @@
//! The `ResourceMetadata` type.
use bit_vec::BitVec;
use wgt::strict_assert;
/// A set of resources, holding a `Arc<T>` and epoch for each member.
///
/// Testing for membership is fast, and iterating over members is
/// reasonably fast in practice. Storage consumption is proportional
/// to the largest id index of any member, not to the number of
/// members, but a bit vector tracks occupancy, so iteration touches
/// only occupied elements.
#[derive(Debug)]
pub(super) struct ResourceMetadata<T: Clone> {
/// If the resource with index `i` is a member, `owned[i]` is `true`.
owned: BitVec<usize>,
/// A vector holding clones of members' `T`s.
resources: Vec<Option<T>>,
}
impl<T: Clone> ResourceMetadata<T> {
pub(super) fn new() -> Self {
Self {
owned: BitVec::default(),
resources: Vec::new(),
}
}
pub(super) fn set_size(&mut self, size: usize) {
self.resources.resize(size, None);
resize_bitvec(&mut self.owned, size);
}
pub(super) fn clear(&mut self) {
self.resources.clear();
self.owned.clear();
}
/// Ensures a given index is in bounds for all arrays and does
/// sanity checks of the presence of a refcount.
///
/// In release mode this function is completely empty and is removed.
#[cfg_attr(not(feature = "strict_asserts"), allow(unused_variables))]
pub(super) fn tracker_assert_in_bounds(&self, index: usize) {
strict_assert!(index < self.owned.len());
strict_assert!(index < self.resources.len());
strict_assert!(if self.contains(index) {
self.resources[index].is_some()
} else {
true
});
}
/// Returns true if the tracker owns no resources.
///
/// This is a O(n) operation.
pub(super) fn is_empty(&self) -> bool {
!self.owned.any()
}
/// Returns true if the set contains the resource with the given index.
pub(super) fn contains(&self, index: usize) -> bool {
self.owned.get(index).unwrap_or(false)
}
/// Returns true if the set contains the resource with the given index.
///
/// # Safety
///
/// The given `index` must be in bounds for this `ResourceMetadata`'s
/// existing tables. See `tracker_assert_in_bounds`.
#[inline(always)]
pub(super) unsafe fn contains_unchecked(&self, index: usize) -> bool {
unsafe { self.owned.get(index).unwrap_unchecked() }
}
/// Insert a resource into the set.
///
/// Add the resource with the given index, epoch, and reference count to the
/// set.
///
/// Returns a reference to the newly inserted resource.
/// (This allows avoiding a clone/reference count increase in many cases.)
///
/// # Safety
///
/// The given `index` must be in bounds for this `ResourceMetadata`'s
/// existing tables. See `tracker_assert_in_bounds`.
#[inline(always)]
pub(super) unsafe fn insert(&mut self, index: usize, resource: T) -> &T {
self.owned.set(index, true);
let resource_dst = unsafe { self.resources.get_unchecked_mut(index) };
resource_dst.insert(resource)
}
/// Get the resource with the given index.
///
/// # Safety
///
/// The given `index` must be in bounds for this `ResourceMetadata`'s
/// existing tables. See `tracker_assert_in_bounds`.
#[inline(always)]
pub(super) unsafe fn get_resource_unchecked(&self, index: usize) -> &T {
unsafe {
self.resources
.get_unchecked(index)
.as_ref()
.unwrap_unchecked()
}
}
/// Returns an iterator over the resources owned by `self`.
pub(super) fn owned_resources(&self) -> impl Iterator<Item = &T> + '_ {
if !self.owned.is_empty() {
self.tracker_assert_in_bounds(self.owned.len() - 1)
};
iterate_bitvec_indices(&self.owned).map(move |index| {
let resource = unsafe { self.resources.get_unchecked(index) };
resource.as_ref().unwrap()
})
}
/// Returns an iterator over the indices of all resources owned by `self`.
pub(super) fn owned_indices(&self) -> impl Iterator<Item = usize> + '_ {
if !self.owned.is_empty() {
self.tracker_assert_in_bounds(self.owned.len() - 1)
};
iterate_bitvec_indices(&self.owned)
}
/// Remove the resource with the given index from the set.
pub(super) unsafe fn remove(&mut self, index: usize) {
unsafe {
*self.resources.get_unchecked_mut(index) = None;
}
self.owned.set(index, false);
}
}
/// A source of resource metadata.
///
/// This is used to abstract over the various places
/// trackers can get new resource metadata from.
pub(super) enum ResourceMetadataProvider<'a, T: Clone> {
/// Comes directly from explicit values.
Direct { resource: &'a T },
/// Comes from another metadata tracker.
Indirect { metadata: &'a ResourceMetadata<T> },
}
impl<T: Clone> ResourceMetadataProvider<'_, T> {
/// Get a reference to the resource from this.
///
/// # Safety
///
/// - The index must be in bounds of the metadata tracker if this uses an indirect source.
#[inline(always)]
pub(super) unsafe fn get(&self, index: usize) -> &T {
match self {
ResourceMetadataProvider::Direct { resource } => resource,
ResourceMetadataProvider::Indirect { metadata } => {
metadata.tracker_assert_in_bounds(index);
{
let resource = unsafe { metadata.resources.get_unchecked(index) }.as_ref();
unsafe { resource.unwrap_unchecked() }
}
}
}
}
}
/// Resizes the given bitvec to the given size. I'm not sure why this is hard to do but it is.
fn resize_bitvec<B: bit_vec::BitBlock>(vec: &mut BitVec<B>, size: usize) {
let owned_size_to_grow = size.checked_sub(vec.len());
if let Some(delta) = owned_size_to_grow {
if delta != 0 {
vec.grow(delta, false);
}
} else {
vec.truncate(size);
}
}
/// Produces an iterator that yields the indexes of all bits that are set in the bitvec.
///
/// Will skip entire usize's worth of bits if they are all false.
fn iterate_bitvec_indices(ownership: &BitVec<usize>) -> impl Iterator<Item = usize> + '_ {
const BITS_PER_BLOCK: usize = usize::BITS as usize;
let size = ownership.len();
ownership
.blocks()
.enumerate()
.filter(|&(_, word)| word != 0)
.flat_map(move |(word_index, mut word)| {
let bit_start = word_index * BITS_PER_BLOCK;
let bit_end = (bit_start + BITS_PER_BLOCK).min(size);
(bit_start..bit_end).filter(move |_| {
let active = word & 0b1 != 0;
word >>= 1;
active
})
})
}

667
vendor/wgpu-core/src/track/mod.rs vendored Normal file
View File

@@ -0,0 +1,667 @@
/*! Resource State and Lifetime Trackers
These structures are responsible for keeping track of resource state,
generating barriers where needednd making sure resources are kept
alive until the trackers die.
## General Architecture
Tracking is some of the hottest code in the entire codebase, so the trackers
are designed to be as cache efficient as possible. They store resource state
in flat vectors, storing metadata SOA style, one vector per type of metadata.
A lot of the tracker code is deeply unsafe, using unchecked accesses all over
to make performance as good as possible. However, for all unsafe accesses, there
is a corresponding debug assert the checks if that access is valid. This helps
get bugs caught fast, while still letting users not need to pay for the bounds
checks.
In wgpu, each resource ID includes a bitfield holding an index.
Indices are allocated and re-used, so they will always be as low as
reasonably possible. This allows us to use IDs to index into an array
of tracking information.
## Statefulness
There are two main types of trackers, stateful and stateless.
Stateful trackers are for buffers and textures. They both have
resource state attached to them which needs to be used to generate
automatic synchronization. Because of the different requirements of
buffers and textures, they have two separate tracking structures.
Stateless trackers only store metadata and own the given resource.
## Use Case
Within each type of tracker, the trackers are further split into 3 different
use cases, Bind Group, Usage Scopend a full Tracker.
Bind Group trackers are just a list of different resources, their refcount,
and how they are used. Textures are used via a selector and a usage type.
Buffers by just a usage type. Stateless resources don't have a usage type.
Usage Scope trackers are only for stateful resources. These trackers represent
a single [`UsageScope`] in the spec. When a use is added to a usage scope,
it is merged with all other uses of that resource in that scope. If there
is a usage conflict, merging will fail and an error will be reported.
Full trackers represent a before and after state of a resource. These
are used for tracking on the device and on command buffers. The before
state represents the state the resource is first used as in the command buffer,
the after state is the state the command buffer leaves the resource in.
These double ended buffers can then be used to generate the needed transitions
between command buffers.
## Dense Datastructure with Sparse Data
This tracking system is based on having completely dense data, but trackers do
not always contain every resource. Some resources (or even most resources) go
unused in any given command buffer. So to help speed up the process of iterating
through possibly thousands of resources, we use a bit vector to represent if
a resource is in the buffer or not. This allows us extremely efficient memory
utilizations well as being able to bail out of whole blocks of 32-64 resources
with a single usize comparison with zero. In practice this means that merging
partially resident buffers is extremely quick.
The main advantage of this dense datastructure is that we can do merging
of trackers in an extremely efficient fashion that results in us doing linear
scans down a couple of buffers. CPUs and their caches absolutely eat this up.
## Stateful Resource Operations
All operations on stateful trackers boil down to one of four operations:
- `insert(tracker, new_state)` adds a resource with a given state to the tracker
for the first time.
- `merge(tracker, new_state)` merges this new state with the previous state, checking
for usage conflicts.
- `barrier(tracker, new_state)` compares the given state to the existing state and
generates the needed barriers.
- `update(tracker, new_state)` takes the given new state and overrides the old state.
This allows us to compose the operations to form the various kinds of tracker merges
that need to happen in the codebase. For each resource in the given merger, the following
operation applies:
```text
UsageScope <- Resource = insert(scope, usage) OR merge(scope, usage)
UsageScope <- UsageScope = insert(scope, scope) OR merge(scope, scope)
CommandBuffer <- UsageScope = insert(buffer.start, buffer.end, scope)
OR barrier(buffer.end, scope) + update(buffer.end, scope)
Device <- CommandBuffer = insert(device.start, device.end, buffer.start, buffer.end)
OR barrier(device.end, buffer.start) + update(device.end, buffer.end)
```
[`UsageScope`]: https://gpuweb.github.io/gpuweb/#programming-model-synchronization
*/
mod buffer;
mod metadata;
mod range;
mod stateless;
mod texture;
use crate::{
binding_model, command,
lock::{rank, Mutex},
pipeline,
resource::{self, Labeled, ResourceErrorIdent},
snatch::SnatchGuard,
};
use std::{fmt, ops, sync::Arc};
use thiserror::Error;
pub(crate) use buffer::{
BufferBindGroupState, BufferTracker, BufferUsageScope, DeviceBufferTracker,
};
use metadata::{ResourceMetadata, ResourceMetadataProvider};
pub(crate) use stateless::StatelessTracker;
pub(crate) use texture::{
DeviceTextureTracker, TextureSelector, TextureTracker, TextureTrackerSetSingle,
TextureUsageScope, TextureViewBindGroupState,
};
use wgt::strict_assert_ne;
#[repr(transparent)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub(crate) struct TrackerIndex(u32);
impl TrackerIndex {
pub fn as_usize(self) -> usize {
self.0 as usize
}
}
/// wgpu-core internally use some array-like storage for tracking resources.
/// To that end, there needs to be a uniquely assigned index for each live resource
/// of a certain type. This index is separate from the resource ID for various reasons:
/// - There can be multiple resource IDs pointing the the same resource.
/// - IDs of dead handles can be recycled while resources are internally held alive (and tracked).
/// - The plan is to remove IDs in the long run
/// ([#5121](https://github.com/gfx-rs/wgpu/issues/5121)).
///
/// In order to produce these tracker indices, there is a shared TrackerIndexAllocator
/// per resource type. Indices have the same lifetime as the internal resource they
/// are associated to (alloc happens when creating the resource and free is called when
/// the resource is dropped).
struct TrackerIndexAllocator {
unused: Vec<TrackerIndex>,
next_index: TrackerIndex,
}
impl TrackerIndexAllocator {
pub fn new() -> Self {
TrackerIndexAllocator {
unused: Vec::new(),
next_index: TrackerIndex(0),
}
}
pub fn alloc(&mut self) -> TrackerIndex {
if let Some(index) = self.unused.pop() {
return index;
}
let index = self.next_index;
self.next_index.0 += 1;
index
}
pub fn free(&mut self, index: TrackerIndex) {
self.unused.push(index);
}
// This is used to pre-allocate the tracker storage.
pub fn size(&self) -> usize {
self.next_index.0 as usize
}
}
impl fmt::Debug for TrackerIndexAllocator {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
Ok(())
}
}
/// See TrackerIndexAllocator.
#[derive(Debug)]
pub(crate) struct SharedTrackerIndexAllocator {
inner: Mutex<TrackerIndexAllocator>,
}
impl SharedTrackerIndexAllocator {
pub fn new() -> Self {
SharedTrackerIndexAllocator {
inner: Mutex::new(
rank::SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
TrackerIndexAllocator::new(),
),
}
}
pub fn alloc(&self) -> TrackerIndex {
self.inner.lock().alloc()
}
pub fn free(&self, index: TrackerIndex) {
self.inner.lock().free(index);
}
pub fn size(&self) -> usize {
self.inner.lock().size()
}
}
pub(crate) struct TrackerIndexAllocators {
pub buffers: Arc<SharedTrackerIndexAllocator>,
pub textures: Arc<SharedTrackerIndexAllocator>,
pub texture_views: Arc<SharedTrackerIndexAllocator>,
pub samplers: Arc<SharedTrackerIndexAllocator>,
pub bind_groups: Arc<SharedTrackerIndexAllocator>,
pub compute_pipelines: Arc<SharedTrackerIndexAllocator>,
pub render_pipelines: Arc<SharedTrackerIndexAllocator>,
pub bundles: Arc<SharedTrackerIndexAllocator>,
pub query_sets: Arc<SharedTrackerIndexAllocator>,
pub blas_s: Arc<SharedTrackerIndexAllocator>,
pub tlas_s: Arc<SharedTrackerIndexAllocator>,
}
impl TrackerIndexAllocators {
pub fn new() -> Self {
TrackerIndexAllocators {
buffers: Arc::new(SharedTrackerIndexAllocator::new()),
textures: Arc::new(SharedTrackerIndexAllocator::new()),
texture_views: Arc::new(SharedTrackerIndexAllocator::new()),
samplers: Arc::new(SharedTrackerIndexAllocator::new()),
bind_groups: Arc::new(SharedTrackerIndexAllocator::new()),
compute_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
render_pipelines: Arc::new(SharedTrackerIndexAllocator::new()),
bundles: Arc::new(SharedTrackerIndexAllocator::new()),
query_sets: Arc::new(SharedTrackerIndexAllocator::new()),
blas_s: Arc::new(SharedTrackerIndexAllocator::new()),
tlas_s: Arc::new(SharedTrackerIndexAllocator::new()),
}
}
}
/// A structure containing all the information about a particular resource
/// transition. User code should be able to generate a pipeline barrier
/// based on the contents.
#[derive(Debug, PartialEq)]
pub(crate) struct PendingTransition<S: ResourceUses> {
pub id: u32,
pub selector: S::Selector,
pub usage: hal::StateTransition<S>,
}
pub(crate) type PendingTransitionList = Vec<PendingTransition<hal::TextureUses>>;
impl PendingTransition<hal::BufferUses> {
/// Produce the hal barrier corresponding to the transition.
pub fn into_hal<'a>(
self,
buf: &'a resource::Buffer,
snatch_guard: &'a SnatchGuard<'a>,
) -> hal::BufferBarrier<'a, dyn hal::DynBuffer> {
let buffer = buf.raw(snatch_guard).expect("Buffer is destroyed");
hal::BufferBarrier {
buffer,
usage: self.usage,
}
}
}
impl PendingTransition<hal::TextureUses> {
/// Produce the hal barrier corresponding to the transition.
pub fn into_hal(
self,
texture: &dyn hal::DynTexture,
) -> hal::TextureBarrier<'_, dyn hal::DynTexture> {
// These showing up in a barrier is always a bug
strict_assert_ne!(self.usage.from, hal::TextureUses::UNKNOWN);
strict_assert_ne!(self.usage.to, hal::TextureUses::UNKNOWN);
let mip_count = self.selector.mips.end - self.selector.mips.start;
strict_assert_ne!(mip_count, 0);
let layer_count = self.selector.layers.end - self.selector.layers.start;
strict_assert_ne!(layer_count, 0);
hal::TextureBarrier {
texture,
range: wgt::ImageSubresourceRange {
aspect: wgt::TextureAspect::All,
base_mip_level: self.selector.mips.start,
mip_level_count: Some(mip_count),
base_array_layer: self.selector.layers.start,
array_layer_count: Some(layer_count),
},
usage: self.usage,
}
}
}
/// The uses that a resource or subresource can be in.
pub(crate) trait ResourceUses:
fmt::Debug + ops::BitAnd<Output = Self> + ops::BitOr<Output = Self> + PartialEq + Sized + Copy
{
/// All flags that are exclusive.
const EXCLUSIVE: Self;
/// The selector used by this resource.
type Selector: fmt::Debug;
/// Turn the resource into a pile of bits.
fn bits(self) -> u16;
/// Returns true if the all the uses are ordered.
fn all_ordered(self) -> bool;
/// Returns true if any of the uses are exclusive.
fn any_exclusive(self) -> bool;
}
/// Returns true if the given states violates the usage scope rule
/// of any(inclusive) XOR one(exclusive)
fn invalid_resource_state<T: ResourceUses>(state: T) -> bool {
// Is power of two also means "is one bit set". We check for this as if
// we're in any exclusive state, we must only be in a single state.
state.any_exclusive() && !state.bits().is_power_of_two()
}
/// Returns true if the transition from one state to another does not require
/// a barrier.
fn skip_barrier<T: ResourceUses>(old_state: T, new_state: T) -> bool {
// If the state didn't change and all the usages are ordered, the hardware
// will guarantee the order of accesses, so we do not need to issue a barrier at all
old_state == new_state && old_state.all_ordered()
}
#[derive(Clone, Debug, Error)]
pub enum ResourceUsageCompatibilityError {
#[error("Attempted to use {res} with {invalid_use}.")]
Buffer {
res: ResourceErrorIdent,
invalid_use: InvalidUse<hal::BufferUses>,
},
#[error(
"Attempted to use {res} (mips {mip_levels:?} layers {array_layers:?}) with {invalid_use}."
)]
Texture {
res: ResourceErrorIdent,
mip_levels: ops::Range<u32>,
array_layers: ops::Range<u32>,
invalid_use: InvalidUse<hal::TextureUses>,
},
}
impl ResourceUsageCompatibilityError {
fn from_buffer(
buffer: &resource::Buffer,
current_state: hal::BufferUses,
new_state: hal::BufferUses,
) -> Self {
Self::Buffer {
res: buffer.error_ident(),
invalid_use: InvalidUse {
current_state,
new_state,
},
}
}
fn from_texture(
texture: &resource::Texture,
selector: TextureSelector,
current_state: hal::TextureUses,
new_state: hal::TextureUses,
) -> Self {
Self::Texture {
res: texture.error_ident(),
mip_levels: selector.mips,
array_layers: selector.layers,
invalid_use: InvalidUse {
current_state,
new_state,
},
}
}
}
/// Pretty print helper that shows helpful descriptions of a conflicting usage.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct InvalidUse<T> {
current_state: T,
new_state: T,
}
impl<T: ResourceUses> fmt::Display for InvalidUse<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let current = self.current_state;
let new = self.new_state;
let current_exclusive = current & T::EXCLUSIVE;
let new_exclusive = new & T::EXCLUSIVE;
let exclusive = current_exclusive | new_exclusive;
// The text starts with "tried to use X resource with {self}"
write!(
f,
"conflicting usages. Current usage {current:?} and new usage {new:?}. \
{exclusive:?} is an exclusive usage and cannot be used with any other \
usages within the usage scope (renderpass or compute dispatch)"
)
}
}
/// All the usages that a bind group contains. The uses are not deduplicated in any way
/// and may include conflicting uses. This is fully compliant by the WebGPU spec.
///
/// All bind group states are sorted by their ID so that when adding to a tracker,
/// they are added in the most efficient order possible (ascending order).
#[derive(Debug)]
pub(crate) struct BindGroupStates {
pub buffers: BufferBindGroupState,
pub views: TextureViewBindGroupState,
pub samplers: StatelessTracker<resource::Sampler>,
pub acceleration_structures: StatelessTracker<resource::Tlas>,
}
impl BindGroupStates {
pub fn new() -> Self {
Self {
buffers: BufferBindGroupState::new(),
views: TextureViewBindGroupState::new(),
samplers: StatelessTracker::new(),
acceleration_structures: StatelessTracker::new(),
}
}
/// Optimize the bind group states by sorting them by ID.
///
/// When this list of states is merged into a tracker, the memory
/// accesses will be in a constant ascending order.
pub fn optimize(&mut self) {
self.buffers.optimize();
// Views are stateless, however, `TextureViewBindGroupState`
// is special as it will be merged with other texture trackers.
self.views.optimize();
// Samplers and Tlas's are stateless and don't need to be optimized
// since the tracker is never merged with any other tracker.
}
}
/// This is a render bundle specific usage scope. It includes stateless resources
/// that are not normally included in a usage scope, but are used by render bundles
/// and need to be owned by the render bundles.
#[derive(Debug)]
pub(crate) struct RenderBundleScope {
pub buffers: BufferUsageScope,
pub textures: TextureUsageScope,
// Don't need to track views and samplers, they are never used directly, only by bind groups.
pub bind_groups: StatelessTracker<binding_model::BindGroup>,
pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
}
impl RenderBundleScope {
/// Create the render bundle scope and pull the maximum IDs from the hubs.
pub fn new() -> Self {
Self {
buffers: BufferUsageScope::default(),
textures: TextureUsageScope::default(),
bind_groups: StatelessTracker::new(),
render_pipelines: StatelessTracker::new(),
}
}
/// Merge the inner contents of a bind group into the render bundle tracker.
///
/// Only stateful things are merged in herell other resources are owned
/// indirectly by the bind group.
///
/// # Safety
///
/// The maximum ID given by each bind group resource must be less than the
/// length of the storage given at the call to `new`.
pub unsafe fn merge_bind_group(
&mut self,
bind_group: &BindGroupStates,
) -> Result<(), ResourceUsageCompatibilityError> {
unsafe { self.buffers.merge_bind_group(&bind_group.buffers)? };
unsafe { self.textures.merge_bind_group(&bind_group.views)? };
Ok(())
}
}
/// A pool for storing the memory used by [`UsageScope`]s. We take and store this memory when the
/// scope is dropped to avoid reallocating. The memory required only grows and allocation cost is
/// significant when a large number of resources have been used.
pub(crate) type UsageScopePool = Mutex<Vec<(BufferUsageScope, TextureUsageScope)>>;
/// A usage scope tracker. Only needs to store stateful resources as stateless
/// resources cannot possibly have a usage conflict.
#[derive(Debug)]
pub(crate) struct UsageScope<'a> {
pub pool: &'a UsageScopePool,
pub buffers: BufferUsageScope,
pub textures: TextureUsageScope,
}
impl<'a> Drop for UsageScope<'a> {
fn drop(&mut self) {
// clear vecs and push into pool
self.buffers.clear();
self.textures.clear();
self.pool.lock().push((
std::mem::take(&mut self.buffers),
std::mem::take(&mut self.textures),
));
}
}
impl UsageScope<'static> {
pub fn new_pooled<'d>(
pool: &'d UsageScopePool,
tracker_indices: &TrackerIndexAllocators,
) -> UsageScope<'d> {
let pooled = pool.lock().pop().unwrap_or_default();
let mut scope = UsageScope::<'d> {
pool,
buffers: pooled.0,
textures: pooled.1,
};
scope.buffers.set_size(tracker_indices.buffers.size());
scope.textures.set_size(tracker_indices.textures.size());
scope
}
}
impl<'a> UsageScope<'a> {
/// Merge the inner contents of a bind group into the usage scope.
///
/// Only stateful things are merged in herell other resources are owned
/// indirectly by the bind group.
///
/// # Safety
///
/// The maximum ID given by each bind group resource must be less than the
/// length of the storage given at the call to `new`.
pub unsafe fn merge_bind_group(
&mut self,
bind_group: &BindGroupStates,
) -> Result<(), ResourceUsageCompatibilityError> {
unsafe {
self.buffers.merge_bind_group(&bind_group.buffers)?;
self.textures.merge_bind_group(&bind_group.views)?;
}
Ok(())
}
/// Merge the inner contents of a bind group into the usage scope.
///
/// Only stateful things are merged in herell other resources are owned
/// indirectly by a bind group or are merged directly into the command buffer tracker.
///
/// # Safety
///
/// The maximum ID given by each bind group resource must be less than the
/// length of the storage given at the call to `new`.
pub unsafe fn merge_render_bundle(
&mut self,
render_bundle: &RenderBundleScope,
) -> Result<(), ResourceUsageCompatibilityError> {
self.buffers.merge_usage_scope(&render_bundle.buffers)?;
self.textures.merge_usage_scope(&render_bundle.textures)?;
Ok(())
}
}
/// A tracker used by Device.
pub(crate) struct DeviceTracker {
pub buffers: DeviceBufferTracker,
pub textures: DeviceTextureTracker,
}
impl DeviceTracker {
pub fn new() -> Self {
Self {
buffers: DeviceBufferTracker::new(),
textures: DeviceTextureTracker::new(),
}
}
}
/// A full double sided tracker used by CommandBuffers.
pub(crate) struct Tracker {
pub buffers: BufferTracker,
pub textures: TextureTracker,
pub blas_s: StatelessTracker<resource::Blas>,
pub tlas_s: StatelessTracker<resource::Tlas>,
pub views: StatelessTracker<resource::TextureView>,
pub bind_groups: StatelessTracker<binding_model::BindGroup>,
pub compute_pipelines: StatelessTracker<pipeline::ComputePipeline>,
pub render_pipelines: StatelessTracker<pipeline::RenderPipeline>,
pub bundles: StatelessTracker<command::RenderBundle>,
pub query_sets: StatelessTracker<resource::QuerySet>,
}
impl Tracker {
pub fn new() -> Self {
Self {
buffers: BufferTracker::new(),
textures: TextureTracker::new(),
blas_s: StatelessTracker::new(),
tlas_s: StatelessTracker::new(),
views: StatelessTracker::new(),
bind_groups: StatelessTracker::new(),
compute_pipelines: StatelessTracker::new(),
render_pipelines: StatelessTracker::new(),
bundles: StatelessTracker::new(),
query_sets: StatelessTracker::new(),
}
}
/// Iterates through all resources in the given bind group and adopts
/// the state given for those resources in the UsageScope. It also
/// removes all touched resources from the usage scope.
///
/// If a transition is needed to get the resources into the needed
/// state, those transitions are stored within the tracker. A
/// subsequent call to [`BufferTracker::drain_transitions`] or
/// [`TextureTracker::drain_transitions`] is needed to get those transitions.
///
/// This is a really funky method used by Compute Passes to generate
/// barriers after a call to dispatch without needing to iterate
/// over all elements in the usage scope. We use each the
/// bind group as a source of which IDs to look at. The bind groups
/// must have first been added to the usage scope.
///
/// Only stateful things are merged in herell other resources are owned
/// indirectly by the bind group.
///
/// # Safety
///
/// The maximum ID given by each bind group resource must be less than the
/// value given to `set_size`
pub unsafe fn set_and_remove_from_usage_scope_sparse(
&mut self,
scope: &mut UsageScope,
bind_group: &BindGroupStates,
) {
unsafe {
self.buffers.set_and_remove_from_usage_scope_sparse(
&mut scope.buffers,
bind_group.buffers.used_tracker_indices(),
)
};
unsafe {
self.textures
.set_and_remove_from_usage_scope_sparse(&mut scope.textures, &bind_group.views)
};
}
}

206
vendor/wgpu-core/src/track/range.rs vendored Normal file
View File

@@ -0,0 +1,206 @@
//Note: this could be the only place where we need `SmallVec`.
//TODO: consider getting rid of it.
use smallvec::SmallVec;
use std::{fmt::Debug, iter, ops::Range};
/// Structure that keeps track of a I -> T mapping,
/// optimized for a case where keys of the same values
/// are often grouped together linearly.
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct RangedStates<I, T> {
/// List of ranges, each associated with a singe value.
/// Ranges of keys have to be non-intersecting and ordered.
ranges: SmallVec<[(Range<I>, T); 1]>,
}
impl<I: Copy + Ord, T: Copy + PartialEq> RangedStates<I, T> {
pub fn from_range(range: Range<I>, value: T) -> Self {
Self {
ranges: iter::once((range, value)).collect(),
}
}
/// Construct a new instance from a slice of ranges.
#[cfg(test)]
pub fn from_slice(values: &[(Range<I>, T)]) -> Self {
Self {
ranges: values.iter().cloned().collect(),
}
}
pub fn iter(&self) -> impl Iterator<Item = &(Range<I>, T)> + Clone {
self.ranges.iter()
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut (Range<I>, T)> {
self.ranges.iter_mut()
}
/// Check that all the ranges are non-intersecting and ordered.
/// Panics otherwise.
#[cfg(test)]
fn check_sanity(&self) {
for a in self.ranges.iter() {
assert!(a.0.start < a.0.end);
}
for (a, b) in self.ranges.iter().zip(self.ranges[1..].iter()) {
assert!(a.0.end <= b.0.start);
}
}
/// Merge the neighboring ranges together, where possible.
pub fn coalesce(&mut self) {
let mut num_removed = 0;
let mut iter = self.ranges.iter_mut();
let mut cur = match iter.next() {
Some(elem) => elem,
None => return,
};
for next in iter {
if cur.0.end == next.0.start && cur.1 == next.1 {
num_removed += 1;
cur.0.end = next.0.end;
next.0.end = next.0.start;
} else {
cur = next;
}
}
if num_removed != 0 {
self.ranges.retain(|pair| pair.0.start != pair.0.end);
}
}
pub fn iter_filter<'a>(
&'a self,
range: &'a Range<I>,
) -> impl Iterator<Item = (Range<I>, &'a T)> + 'a {
self.ranges
.iter()
.filter(move |&(inner, ..)| inner.end > range.start && inner.start < range.end)
.map(move |(inner, v)| {
let new_range = inner.start.max(range.start)..inner.end.min(range.end);
(new_range, v)
})
}
/// Split the storage ranges in such a way that there is a linear subset of
/// them occupying exactly `index` range, which is returned mutably.
///
/// Gaps in the ranges are filled with `default` value.
pub fn isolate(&mut self, index: &Range<I>, default: T) -> &mut [(Range<I>, T)] {
//TODO: implement this in 2 passes:
// 1. scan the ranges to figure out how many extra ones need to be inserted
// 2. go through the ranges by moving them them to the right and inserting the missing ones
let mut start_pos = match self.ranges.iter().position(|pair| pair.0.end > index.start) {
Some(pos) => pos,
None => {
let pos = self.ranges.len();
self.ranges.push((index.clone(), default));
return &mut self.ranges[pos..];
}
};
{
let (range, value) = self.ranges[start_pos].clone();
if range.start < index.start {
self.ranges[start_pos].0.start = index.start;
self.ranges
.insert(start_pos, (range.start..index.start, value));
start_pos += 1;
}
}
let mut pos = start_pos;
let mut range_pos = index.start;
loop {
let (range, value) = self.ranges[pos].clone();
if range.start >= index.end {
self.ranges.insert(pos, (range_pos..index.end, default));
pos += 1;
break;
}
if range.start > range_pos {
self.ranges.insert(pos, (range_pos..range.start, default));
pos += 1;
range_pos = range.start;
}
if range.end >= index.end {
if range.end != index.end {
self.ranges[pos].0.start = index.end;
self.ranges.insert(pos, (range_pos..index.end, value));
}
pos += 1;
break;
}
pos += 1;
range_pos = range.end;
if pos == self.ranges.len() {
self.ranges.push((range_pos..index.end, default));
pos += 1;
break;
}
}
&mut self.ranges[start_pos..pos]
}
/// Helper method for isolation that checks the sanity of the results.
#[cfg(test)]
pub fn sanely_isolated(&self, index: Range<I>, default: T) -> Vec<(Range<I>, T)> {
let mut clone = self.clone();
let result = clone.isolate(&index, default).to_vec();
clone.check_sanity();
result
}
}
#[cfg(test)]
mod test {
//TODO: randomized/fuzzy testing
use super::RangedStates;
#[test]
fn sane_good() {
let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9)]);
rs.check_sanity();
}
#[test]
#[should_panic]
fn sane_empty() {
let rs = RangedStates::from_slice(&[(1..4, 9u8), (5..5, 9)]);
rs.check_sanity();
}
#[test]
#[should_panic]
fn sane_intersect() {
let rs = RangedStates::from_slice(&[(1..4, 9u8), (3..5, 9)]);
rs.check_sanity();
}
#[test]
fn coalesce() {
let mut rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]);
rs.coalesce();
rs.check_sanity();
assert_eq!(rs.ranges.as_slice(), &[(1..5, 9), (5..7, 1), (8..9, 1),]);
}
#[test]
fn isolate() {
let rs = RangedStates::from_slice(&[(1..4, 9u8), (4..5, 9), (5..7, 1), (8..9, 1)]);
assert_eq!(&rs.sanely_isolated(4..5, 0), &[(4..5, 9u8),]);
assert_eq!(
&rs.sanely_isolated(0..6, 0),
&[(0..1, 0), (1..4, 9u8), (4..5, 9), (5..6, 1),]
);
assert_eq!(&rs.sanely_isolated(8..10, 1), &[(8..9, 1), (9..10, 1),]);
assert_eq!(
&rs.sanely_isolated(6..9, 0),
&[(6..7, 1), (7..8, 0), (8..9, 1),]
);
}
}

36
vendor/wgpu-core/src/track/stateless.rs vendored Normal file
View File

@@ -0,0 +1,36 @@
use std::slice::Iter;
use std::sync::Arc;
/// A tracker that holds strong references to resources.
///
/// This is only used to keep resources alive.
#[derive(Debug)]
pub(crate) struct StatelessTracker<T> {
resources: Vec<Arc<T>>,
}
impl<T> StatelessTracker<T> {
pub fn new() -> Self {
Self {
resources: Vec::new(),
}
}
/// Inserts a single resource into the resource tracker.
///
/// Returns a reference to the newly inserted resource.
/// (This allows avoiding a clone/reference count increase in many cases.)
pub fn insert_single(&mut self, resource: Arc<T>) -> &Arc<T> {
self.resources.push(resource);
unsafe { self.resources.last().unwrap_unchecked() }
}
}
impl<'a, T> IntoIterator for &'a StatelessTracker<T> {
type Item = &'a Arc<T>;
type IntoIter = Iter<'a, Arc<T>>;
fn into_iter(self) -> Self::IntoIter {
self.resources.as_slice().iter()
}
}

1536
vendor/wgpu-core/src/track/texture.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1330
vendor/wgpu-core/src/validation.rs vendored Normal file

File diff suppressed because it is too large Load Diff

66
vendor/wgpu-core/src/weak_vec.rs vendored Normal file
View File

@@ -0,0 +1,66 @@
//! Module containing the [`WeakVec`] API.
use std::sync::Weak;
/// An optimized container for `Weak` references of `T` that minimizes reallocations by
/// dropping older elements that no longer have strong references to them.
#[derive(Debug)]
pub(crate) struct WeakVec<T> {
inner: Vec<Weak<T>>,
}
impl<T> Default for WeakVec<T> {
fn default() -> Self {
Self {
inner: Default::default(),
}
}
}
impl<T> WeakVec<T> {
pub(crate) fn new() -> Self {
Self { inner: Vec::new() }
}
/// Pushes a new element to this collection.
///
/// If the inner Vec needs to be reallocated, we will first drop older elements that
/// no longer have strong references to them.
pub(crate) fn push(&mut self, value: Weak<T>) {
if self.inner.len() == self.inner.capacity() {
// Iterating backwards has the advantage that we don't do more work than we have to.
for i in (0..self.inner.len()).rev() {
if self.inner[i].strong_count() == 0 {
self.inner.swap_remove(i);
}
}
// Make sure our capacity is twice the number of live elements.
// Leaving some spare capacity ensures that we won't re-scan immediately.
self.inner.reserve_exact(self.inner.len());
}
self.inner.push(value);
}
}
pub(crate) struct WeakVecIter<T> {
inner: std::vec::IntoIter<Weak<T>>,
}
impl<T> Iterator for WeakVecIter<T> {
type Item = Weak<T>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
impl<T> IntoIterator for WeakVec<T> {
type Item = Weak<T>;
type IntoIter = WeakVecIter<T>;
fn into_iter(self) -> Self::IntoIter {
WeakVecIter {
inner: self.inner.into_iter(),
}
}
}