Vendor dependencies for 0.3.0 release

This commit is contained in:
2025-09-27 10:29:08 -05:00
parent 0c8d39d483
commit 82ab7f317b
26803 changed files with 16134934 additions and 0 deletions

611
vendor/gpu-alloc/src/allocator.rs vendored Normal file
View File

@@ -0,0 +1,611 @@
use {
crate::{
align_down,
block::{MemoryBlock, MemoryBlockFlavor},
buddy::{BuddyAllocator, BuddyBlock},
config::Config,
error::AllocationError,
freelist::{FreeListAllocator, FreeListBlock},
heap::Heap,
usage::{MemoryForUsage, UsageFlags},
MemoryBounds, Request,
},
alloc::boxed::Box,
core::convert::TryFrom as _,
gpu_alloc_types::{
AllocationFlags, DeviceProperties, MemoryDevice, MemoryPropertyFlags, MemoryType,
OutOfMemory,
},
};
/// Memory allocator for Vulkan-like APIs.
#[derive(Debug)]
pub struct GpuAllocator<M> {
dedicated_threshold: u64,
preferred_dedicated_threshold: u64,
transient_dedicated_threshold: u64,
max_memory_allocation_size: u64,
memory_for_usage: MemoryForUsage,
memory_types: Box<[MemoryType]>,
memory_heaps: Box<[Heap]>,
allocations_remains: u32,
non_coherent_atom_mask: u64,
starting_free_list_chunk: u64,
final_free_list_chunk: u64,
minimal_buddy_size: u64,
initial_buddy_dedicated_size: u64,
buffer_device_address: bool,
buddy_allocators: Box<[Option<BuddyAllocator<M>>]>,
freelist_allocators: Box<[Option<FreeListAllocator<M>>]>,
}
/// Hints for allocator to decide on allocation strategy.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[non_exhaustive]
pub enum Dedicated {
/// Allocation directly from device.\
/// Very slow.
/// Count of allocations is limited.\
/// Use with caution.\
/// Must be used if resource has to be bound to dedicated memory object.
Required,
/// Hint for allocator that dedicated memory object is preferred.\
/// Should be used if it is known that resource placed in dedicated memory object
/// would allow for better performance.\
/// Implementation is allowed to return block to shared memory object.
Preferred,
}
impl<M> GpuAllocator<M>
where
M: MemoryBounds + 'static,
{
/// Creates new instance of `GpuAllocator`.
/// Provided `DeviceProperties` should match properties of `MemoryDevice` that will be used
/// with created `GpuAllocator` instance.
#[cfg_attr(feature = "tracing", tracing::instrument)]
pub fn new(config: Config, props: DeviceProperties<'_>) -> Self {
assert!(
props.non_coherent_atom_size.is_power_of_two(),
"`non_coherent_atom_size` must be power of two"
);
assert!(
isize::try_from(props.non_coherent_atom_size).is_ok(),
"`non_coherent_atom_size` must fit host address space"
);
GpuAllocator {
dedicated_threshold: config.dedicated_threshold,
preferred_dedicated_threshold: config
.preferred_dedicated_threshold
.min(config.dedicated_threshold),
transient_dedicated_threshold: config
.transient_dedicated_threshold
.max(config.dedicated_threshold),
max_memory_allocation_size: props.max_memory_allocation_size,
memory_for_usage: MemoryForUsage::new(props.memory_types.as_ref()),
memory_types: props.memory_types.as_ref().iter().copied().collect(),
memory_heaps: props
.memory_heaps
.as_ref()
.iter()
.map(|heap| Heap::new(heap.size))
.collect(),
buffer_device_address: props.buffer_device_address,
allocations_remains: props.max_memory_allocation_count,
non_coherent_atom_mask: props.non_coherent_atom_size - 1,
starting_free_list_chunk: config.starting_free_list_chunk,
final_free_list_chunk: config.final_free_list_chunk,
minimal_buddy_size: config.minimal_buddy_size,
initial_buddy_dedicated_size: config.initial_buddy_dedicated_size,
buddy_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(),
freelist_allocators: props.memory_types.as_ref().iter().map(|_| None).collect(),
}
}
/// Allocates memory block from specified `device` according to the `request`.
///
/// # Safety
///
/// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance.
/// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
/// and memory blocks allocated from it.
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn alloc(
&mut self,
device: &impl MemoryDevice<M>,
request: Request,
) -> Result<MemoryBlock<M>, AllocationError> {
self.alloc_internal(device, request, None)
}
/// Allocates memory block from specified `device` according to the `request`.
/// This function allows user to force specific allocation strategy.
/// Improper use can lead to suboptimal performance or too large overhead.
/// Prefer `GpuAllocator::alloc` if doubt.
///
/// # Safety
///
/// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance.
/// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
/// and memory blocks allocated from it.
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn alloc_with_dedicated(
&mut self,
device: &impl MemoryDevice<M>,
request: Request,
dedicated: Dedicated,
) -> Result<MemoryBlock<M>, AllocationError> {
self.alloc_internal(device, request, Some(dedicated))
}
unsafe fn alloc_internal(
&mut self,
device: &impl MemoryDevice<M>,
mut request: Request,
dedicated: Option<Dedicated>,
) -> Result<MemoryBlock<M>, AllocationError> {
enum Strategy {
Buddy,
Dedicated,
FreeList,
}
request.usage = with_implicit_usage_flags(request.usage);
if request.usage.contains(UsageFlags::DEVICE_ADDRESS) {
assert!(self.buffer_device_address, "`DEVICE_ADDRESS` cannot be requested when `DeviceProperties::buffer_device_address` is false");
}
if request.size > self.max_memory_allocation_size {
return Err(AllocationError::OutOfDeviceMemory);
}
if let Some(Dedicated::Required) = dedicated {
if self.allocations_remains == 0 {
return Err(AllocationError::TooManyObjects);
}
}
if 0 == self.memory_for_usage.mask(request.usage) & request.memory_types {
#[cfg(feature = "tracing")]
tracing::error!(
"Cannot serve request {:?}, no memory among bitset `{}` support usage {:?}",
request,
request.memory_types,
request.usage
);
return Err(AllocationError::NoCompatibleMemoryTypes);
}
let transient = request.usage.contains(UsageFlags::TRANSIENT);
for &index in self.memory_for_usage.types(request.usage) {
if 0 == request.memory_types & (1 << index) {
// Skip memory type incompatible with the request.
continue;
}
let memory_type = &self.memory_types[index as usize];
let heap = memory_type.heap;
let heap = &mut self.memory_heaps[heap as usize];
if request.size > heap.size() {
// Impossible to use memory type from this heap.
continue;
}
let atom_mask = if host_visible_non_coherent(memory_type.props) {
self.non_coherent_atom_mask
} else {
0
};
let flags = if self.buffer_device_address {
AllocationFlags::DEVICE_ADDRESS
} else {
AllocationFlags::empty()
};
let strategy = match (dedicated, transient) {
(Some(Dedicated::Required), _) => Strategy::Dedicated,
(Some(Dedicated::Preferred), _)
if request.size >= self.preferred_dedicated_threshold =>
{
Strategy::Dedicated
}
(_, true) => {
let threshold = self.transient_dedicated_threshold.min(heap.size() / 32);
if request.size < threshold {
Strategy::FreeList
} else {
Strategy::Dedicated
}
}
(_, false) => {
let threshold = self.dedicated_threshold.min(heap.size() / 32);
if request.size < threshold {
Strategy::Buddy
} else {
Strategy::Dedicated
}
}
};
match strategy {
Strategy::Dedicated => {
#[cfg(feature = "tracing")]
tracing::debug!(
"Allocating memory object `{}@{:?}`",
request.size,
memory_type
);
match device.allocate_memory(request.size, index, flags) {
Ok(memory) => {
self.allocations_remains -= 1;
heap.alloc(request.size);
return Ok(MemoryBlock::new(
index,
memory_type.props,
0,
request.size,
atom_mask,
MemoryBlockFlavor::Dedicated { memory },
));
}
Err(OutOfMemory::OutOfDeviceMemory) => continue,
Err(OutOfMemory::OutOfHostMemory) => {
return Err(AllocationError::OutOfHostMemory)
}
}
}
Strategy::FreeList => {
let allocator = match &mut self.freelist_allocators[index as usize] {
Some(allocator) => allocator,
slot => {
let starting_free_list_chunk = match align_down(
self.starting_free_list_chunk.min(heap.size() / 32),
atom_mask,
) {
0 => atom_mask,
other => other,
};
let final_free_list_chunk = match align_down(
self.final_free_list_chunk
.max(self.starting_free_list_chunk)
.max(self.transient_dedicated_threshold)
.min(heap.size() / 32),
atom_mask,
) {
0 => atom_mask,
other => other,
};
slot.get_or_insert(FreeListAllocator::new(
starting_free_list_chunk,
final_free_list_chunk,
index,
memory_type.props,
if host_visible_non_coherent(memory_type.props) {
self.non_coherent_atom_mask
} else {
0
},
))
}
};
let result = allocator.alloc(
device,
request.size,
request.align_mask,
flags,
heap,
&mut self.allocations_remains,
);
match result {
Ok(block) => {
return Ok(MemoryBlock::new(
index,
memory_type.props,
block.offset,
block.size,
atom_mask,
MemoryBlockFlavor::FreeList {
chunk: block.chunk,
ptr: block.ptr,
memory: block.memory,
},
))
}
Err(AllocationError::OutOfDeviceMemory) => continue,
Err(err) => return Err(err),
}
}
Strategy::Buddy => {
let allocator = match &mut self.buddy_allocators[index as usize] {
Some(allocator) => allocator,
slot => {
let minimal_buddy_size = self
.minimal_buddy_size
.min(heap.size() / 1024)
.next_power_of_two();
let initial_buddy_dedicated_size = self
.initial_buddy_dedicated_size
.min(heap.size() / 32)
.next_power_of_two();
slot.get_or_insert(BuddyAllocator::new(
minimal_buddy_size,
initial_buddy_dedicated_size,
index,
memory_type.props,
if host_visible_non_coherent(memory_type.props) {
self.non_coherent_atom_mask
} else {
0
},
))
}
};
let result = allocator.alloc(
device,
request.size,
request.align_mask,
flags,
heap,
&mut self.allocations_remains,
);
match result {
Ok(block) => {
return Ok(MemoryBlock::new(
index,
memory_type.props,
block.offset,
block.size,
atom_mask,
MemoryBlockFlavor::Buddy {
chunk: block.chunk,
ptr: block.ptr,
index: block.index,
memory: block.memory,
},
))
}
Err(AllocationError::OutOfDeviceMemory) => continue,
Err(err) => return Err(err),
}
}
}
}
Err(AllocationError::OutOfDeviceMemory)
}
/// Creates a memory block from an existing memory allocation, transferring ownership to the allocator.
///
/// This function allows the [`GpuAllocator`] to manage memory allocated outside of the typical
/// [`GpuAllocator::alloc`] family of functions.
///
/// # Usage
///
/// If you need to import external memory, such as a Win32 `HANDLE` or a Linux `dmabuf`, import the device
/// memory using the graphics api and platform dependent functions. Once that is done, call this function
/// to make the [`GpuAllocator`] take ownership of the imported memory.
///
/// When calling this function, you **must** ensure there are [enough remaining allocations](GpuAllocator::remaining_allocations).
///
/// # Safety
///
/// - The `memory` must be allocated with the same device that was provided to create this [`GpuAllocator`]
/// instance.
/// - The `memory` must be valid.
/// - The `props`, `offset` and `size` must match the properties, offset and size of the memory allocation.
/// - The memory must have been allocated with the specified `memory_type`.
/// - There must be enough remaining allocations.
/// - The memory allocation must not come from an existing memory block created by this allocator.
/// - The underlying memory object must be deallocated using the returned [`MemoryBlock`] with
/// [`GpuAllocator::dealloc`].
pub unsafe fn import_memory(
&mut self,
memory: M,
memory_type: u32,
props: MemoryPropertyFlags,
offset: u64,
size: u64,
) -> MemoryBlock<M> {
// Get the heap which the imported memory is from.
let heap = self
.memory_types
.get(memory_type as usize)
.expect("Invalid memory type specified when importing memory")
.heap;
let heap = &mut self.memory_heaps[heap as usize];
#[cfg(feature = "tracing")]
tracing::debug!(
"Importing memory object {:?} `{}@{:?}`",
memory,
size,
memory_type
);
assert_ne!(
self.allocations_remains, 0,
"Out of allocations when importing a memory block. Ensure you check GpuAllocator::remaining_allocations before import."
);
self.allocations_remains -= 1;
let atom_mask = if host_visible_non_coherent(props) {
self.non_coherent_atom_mask
} else {
0
};
heap.alloc(size);
MemoryBlock::new(
memory_type,
props,
offset,
size,
atom_mask,
MemoryBlockFlavor::Dedicated { memory },
)
}
/// Deallocates memory block previously allocated from this `GpuAllocator` instance.
///
/// # Safety
///
/// * Memory block must have been allocated by this `GpuAllocator` instance
/// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
/// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
/// and memory blocks allocated from it
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn dealloc(&mut self, device: &impl MemoryDevice<M>, block: MemoryBlock<M>) {
let memory_type = block.memory_type();
let offset = block.offset();
let size = block.size();
let flavor = block.deallocate();
match flavor {
MemoryBlockFlavor::Dedicated { memory } => {
let heap = self.memory_types[memory_type as usize].heap;
device.deallocate_memory(memory);
self.allocations_remains += 1;
self.memory_heaps[heap as usize].dealloc(size);
}
MemoryBlockFlavor::Buddy {
chunk,
ptr,
index,
memory,
} => {
let heap = self.memory_types[memory_type as usize].heap;
let heap = &mut self.memory_heaps[heap as usize];
let allocator = self.buddy_allocators[memory_type as usize]
.as_mut()
.expect("Allocator should exist");
allocator.dealloc(
device,
BuddyBlock {
memory,
ptr,
offset,
size,
chunk,
index,
},
heap,
&mut self.allocations_remains,
);
}
MemoryBlockFlavor::FreeList { chunk, ptr, memory } => {
let heap = self.memory_types[memory_type as usize].heap;
let heap = &mut self.memory_heaps[heap as usize];
let allocator = self.freelist_allocators[memory_type as usize]
.as_mut()
.expect("Allocator should exist");
allocator.dealloc(
device,
FreeListBlock {
memory,
ptr,
chunk,
offset,
size,
},
heap,
&mut self.allocations_remains,
);
}
}
}
/// Returns the maximum allocation size supported.
pub fn max_allocation_size(&self) -> u64 {
self.max_memory_allocation_size
}
/// Returns the number of remaining available allocations.
///
/// This may be useful if you need know if the allocator can allocate a number of allocations ahead of
/// time. This function is also useful for ensuring you do not allocate too much memory outside allocator
/// (such as external memory).
pub fn remaining_allocations(&self) -> u32 {
self.allocations_remains
}
/// Sets the number of remaining available allocations.
///
/// # Safety
///
/// The caller is responsible for ensuring the number of remaining allocations does not exceed how many
/// remaining allocations there actually are on the memory device.
pub unsafe fn set_remaining_allocations(&mut self, remaining: u32) {
self.allocations_remains = remaining;
}
/// Deallocates leftover memory objects.
/// Should be used before dropping.
///
/// # Safety
///
/// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
/// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
/// and memory blocks allocated from it
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn cleanup(&mut self, device: &impl MemoryDevice<M>) {
for (index, allocator) in self
.freelist_allocators
.iter_mut()
.enumerate()
.filter_map(|(index, allocator)| Some((index, allocator.as_mut()?)))
{
let memory_type = &self.memory_types[index];
let heap = memory_type.heap;
let heap = &mut self.memory_heaps[heap as usize];
allocator.cleanup(device, heap, &mut self.allocations_remains);
}
}
}
fn host_visible_non_coherent(props: MemoryPropertyFlags) -> bool {
(props & (MemoryPropertyFlags::HOST_COHERENT | MemoryPropertyFlags::HOST_VISIBLE))
== MemoryPropertyFlags::HOST_VISIBLE
}
fn with_implicit_usage_flags(usage: UsageFlags) -> UsageFlags {
if usage.is_empty() {
UsageFlags::FAST_DEVICE_ACCESS
} else if usage.intersects(UsageFlags::DOWNLOAD | UsageFlags::UPLOAD) {
usage | UsageFlags::HOST_ACCESS
} else {
usage
}
}

327
vendor/gpu-alloc/src/block.rs vendored Normal file
View File

@@ -0,0 +1,327 @@
use {
crate::{align_down, align_up, error::MapError},
alloc::sync::Arc,
core::{
convert::TryFrom as _,
ptr::{copy_nonoverlapping, NonNull},
// sync::atomic::{AtomicU8, Ordering::*},
},
gpu_alloc_types::{MappedMemoryRange, MemoryDevice, MemoryPropertyFlags},
};
#[derive(Debug)]
struct Relevant;
impl Drop for Relevant {
fn drop(&mut self) {
report_error_on_drop!("Memory block wasn't deallocated");
}
}
/// Memory block allocated by `GpuAllocator`.
#[derive(Debug)]
pub struct MemoryBlock<M> {
memory_type: u32,
props: MemoryPropertyFlags,
offset: u64,
size: u64,
atom_mask: u64,
mapped: bool,
flavor: MemoryBlockFlavor<M>,
relevant: Relevant,
}
impl<M> MemoryBlock<M> {
pub(crate) fn new(
memory_type: u32,
props: MemoryPropertyFlags,
offset: u64,
size: u64,
atom_mask: u64,
flavor: MemoryBlockFlavor<M>,
) -> Self {
isize::try_from(atom_mask).expect("`atom_mask` is too large");
MemoryBlock {
memory_type,
props,
offset,
size,
atom_mask,
flavor,
mapped: false,
relevant: Relevant,
}
}
pub(crate) fn deallocate(self) -> MemoryBlockFlavor<M> {
core::mem::forget(self.relevant);
self.flavor
}
}
unsafe impl<M> Sync for MemoryBlock<M> where M: Sync {}
unsafe impl<M> Send for MemoryBlock<M> where M: Send {}
#[derive(Debug)]
pub(crate) enum MemoryBlockFlavor<M> {
Dedicated {
memory: M,
},
Buddy {
chunk: usize,
index: usize,
ptr: Option<NonNull<u8>>,
memory: Arc<M>,
},
FreeList {
chunk: u64,
ptr: Option<NonNull<u8>>,
memory: Arc<M>,
},
}
impl<M> MemoryBlock<M> {
/// Returns reference to parent memory object.
#[inline(always)]
pub fn memory(&self) -> &M {
match &self.flavor {
MemoryBlockFlavor::Dedicated { memory } => memory,
MemoryBlockFlavor::Buddy { memory, .. } => memory,
MemoryBlockFlavor::FreeList { memory, .. } => memory,
}
}
/// Returns offset in bytes from start of memory object to start of this block.
#[inline(always)]
pub fn offset(&self) -> u64 {
self.offset
}
/// Returns size of this memory block.
#[inline(always)]
pub fn size(&self) -> u64 {
self.size
}
/// Returns memory property flags for parent memory object.
#[inline(always)]
pub fn props(&self) -> MemoryPropertyFlags {
self.props
}
/// Returns index of type of parent memory object.
#[inline(always)]
pub fn memory_type(&self) -> u32 {
self.memory_type
}
/// Returns pointer to mapped memory range of this block.
/// This blocks becomes mapped.
///
/// The user of returned pointer must guarantee that any previously submitted command that writes to this range has completed
/// before the host reads from or writes to that range,
/// and that any previously submitted command that reads from that range has completed
/// before the host writes to that region.
/// If the device memory was allocated without the `HOST_COHERENT` property flag set,
/// these guarantees must be made for an extended range:
/// the user must round down the start of the range to the nearest multiple of `non_coherent_atom_size`,
/// and round the end of the range up to the nearest multiple of `non_coherent_atom_size`.
///
/// # Panics
///
/// This function panics if block is currently mapped.
///
/// # Safety
///
/// `block` must have been allocated from specified `device`.
#[inline(always)]
pub unsafe fn map(
&mut self,
device: &impl MemoryDevice<M>,
offset: u64,
size: usize,
) -> Result<NonNull<u8>, MapError> {
let size_u64 = u64::try_from(size).expect("`size` doesn't fit device address space");
assert!(offset < self.size, "`offset` is out of memory block bounds");
assert!(
size_u64 <= self.size - offset,
"`offset + size` is out of memory block bounds"
);
let ptr = match &mut self.flavor {
MemoryBlockFlavor::Dedicated { memory } => {
let end = align_up(offset + size_u64, self.atom_mask)
.expect("mapping end doesn't fit device address space");
let aligned_offset = align_down(offset, self.atom_mask);
if !acquire_mapping(&mut self.mapped) {
return Err(MapError::AlreadyMapped);
}
let result =
device.map_memory(memory, self.offset + aligned_offset, end - aligned_offset);
match result {
// the overflow is checked in `Self::new()`
Ok(ptr) => {
let ptr_offset = (offset - aligned_offset) as isize;
ptr.as_ptr().offset(ptr_offset)
}
Err(err) => {
release_mapping(&mut self.mapped);
return Err(err.into());
}
}
}
MemoryBlockFlavor::FreeList { ptr: Some(ptr), .. }
| MemoryBlockFlavor::Buddy { ptr: Some(ptr), .. } => {
if !acquire_mapping(&mut self.mapped) {
return Err(MapError::AlreadyMapped);
}
let offset_isize = isize::try_from(offset)
.expect("Buddy and linear block should fit host address space");
ptr.as_ptr().offset(offset_isize)
}
_ => return Err(MapError::NonHostVisible),
};
Ok(NonNull::new_unchecked(ptr))
}
/// Unmaps memory range of this block that was previously mapped with `Block::map`.
/// This block becomes unmapped.
///
/// # Panics
///
/// This function panics if this block is not currently mapped.
///
/// # Safety
///
/// `block` must have been allocated from specified `device`.
#[inline(always)]
pub unsafe fn unmap(&mut self, device: &impl MemoryDevice<M>) -> bool {
if !release_mapping(&mut self.mapped) {
return false;
}
match &mut self.flavor {
MemoryBlockFlavor::Dedicated { memory } => {
device.unmap_memory(memory);
}
MemoryBlockFlavor::Buddy { .. } => {}
MemoryBlockFlavor::FreeList { .. } => {}
}
true
}
/// Transiently maps block memory range and copies specified data
/// to the mapped memory range.
///
/// # Panics
///
/// This function panics if block is currently mapped.
///
/// # Safety
///
/// `block` must have been allocated from specified `device`.
/// The caller must guarantee that any previously submitted command that reads or writes to this range has completed.
#[inline(always)]
pub unsafe fn write_bytes(
&mut self,
device: &impl MemoryDevice<M>,
offset: u64,
data: &[u8],
) -> Result<(), MapError> {
let size = data.len();
let ptr = self.map(device, offset, size)?;
copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), size);
let result = if !self.coherent() {
let aligned_offset = align_down(offset, self.atom_mask);
let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap();
device.flush_memory_ranges(&[MappedMemoryRange {
memory: self.memory(),
offset: self.offset + aligned_offset,
size: end - aligned_offset,
}])
} else {
Ok(())
};
self.unmap(device);
result.map_err(Into::into)
}
/// Transiently maps block memory range and copies specified data
/// from the mapped memory range.
///
/// # Panics
///
/// This function panics if block is currently mapped.
///
/// # Safety
///
/// `block` must have been allocated from specified `device`.
/// The caller must guarantee that any previously submitted command that reads to this range has completed.
#[inline(always)]
pub unsafe fn read_bytes(
&mut self,
device: &impl MemoryDevice<M>,
offset: u64,
data: &mut [u8],
) -> Result<(), MapError> {
#[cfg(feature = "tracing")]
{
if !self.cached() {
tracing::warn!("Reading from non-cached memory may be slow. Consider allocating HOST_CACHED memory block for host reads.")
}
}
let size = data.len();
let ptr = self.map(device, offset, size)?;
let result = if !self.coherent() {
let aligned_offset = align_down(offset, self.atom_mask);
let end = align_up(offset + data.len() as u64, self.atom_mask).unwrap();
device.invalidate_memory_ranges(&[MappedMemoryRange {
memory: self.memory(),
offset: self.offset + aligned_offset,
size: end - aligned_offset,
}])
} else {
Ok(())
};
if result.is_ok() {
copy_nonoverlapping(ptr.as_ptr(), data.as_mut_ptr(), size);
}
self.unmap(device);
result.map_err(Into::into)
}
fn coherent(&self) -> bool {
self.props.contains(MemoryPropertyFlags::HOST_COHERENT)
}
#[cfg(feature = "tracing")]
fn cached(&self) -> bool {
self.props.contains(MemoryPropertyFlags::HOST_CACHED)
}
}
fn acquire_mapping(mapped: &mut bool) -> bool {
if *mapped {
false
} else {
*mapped = true;
true
}
}
fn release_mapping(mapped: &mut bool) -> bool {
if *mapped {
*mapped = false;
true
} else {
false
}
}

460
vendor/gpu-alloc/src/buddy.rs vendored Normal file
View File

@@ -0,0 +1,460 @@
use {
crate::{
align_up, error::AllocationError, heap::Heap, slab::Slab, unreachable_unchecked,
util::try_arc_unwrap, MemoryBounds,
},
alloc::{sync::Arc, vec::Vec},
core::{convert::TryFrom as _, mem::replace, ptr::NonNull},
gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags},
};
#[derive(Debug)]
pub(crate) struct BuddyBlock<M> {
pub memory: Arc<M>,
pub ptr: Option<NonNull<u8>>,
pub offset: u64,
pub size: u64,
pub chunk: usize,
pub index: usize,
}
unsafe impl<M> Sync for BuddyBlock<M> where M: Sync {}
unsafe impl<M> Send for BuddyBlock<M> where M: Send {}
#[derive(Clone, Copy, Debug)]
enum PairState {
Exhausted,
Ready {
ready: Side,
next: usize,
prev: usize,
},
}
impl PairState {
unsafe fn replace_next(&mut self, value: usize) -> usize {
match self {
PairState::Exhausted => unreachable_unchecked(),
PairState::Ready { next, .. } => replace(next, value),
}
}
unsafe fn replace_prev(&mut self, value: usize) -> usize {
match self {
PairState::Exhausted => unreachable_unchecked(),
PairState::Ready { prev, .. } => replace(prev, value),
}
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
enum Side {
Left,
Right,
}
use Side::*;
#[derive(Debug)]
struct PairEntry {
state: PairState,
chunk: usize,
offset: u64,
parent: Option<usize>,
}
struct SizeBlockEntry {
chunk: usize,
offset: u64,
index: usize,
}
#[derive(Debug)]
struct Size {
next_ready: usize,
pairs: Slab<PairEntry>,
}
#[derive(Debug)]
enum Release {
None,
Parent(usize),
Chunk(usize),
}
impl Size {
fn new() -> Self {
Size {
pairs: Slab::new(),
next_ready: 0,
}
}
unsafe fn add_pair_and_acquire_left(
&mut self,
chunk: usize,
offset: u64,
parent: Option<usize>,
) -> SizeBlockEntry {
if self.next_ready < self.pairs.len() {
unreachable_unchecked()
}
let index = self.pairs.insert(PairEntry {
state: PairState::Exhausted,
chunk,
offset,
parent,
});
let entry = self.pairs.get_unchecked_mut(index);
entry.state = PairState::Ready {
next: index,
prev: index,
ready: Right, // Left is allocated.
};
self.next_ready = index;
SizeBlockEntry {
chunk,
offset,
index: index << 1,
}
}
fn acquire(&mut self, size: u64) -> Option<SizeBlockEntry> {
if self.next_ready >= self.pairs.len() {
return None;
}
let ready = self.next_ready;
let entry = unsafe { self.pairs.get_unchecked_mut(ready) };
let chunk = entry.chunk;
let offset = entry.offset;
let bit = match entry.state {
PairState::Exhausted => unsafe { unreachable_unchecked() },
PairState::Ready { ready, next, prev } => {
entry.state = PairState::Exhausted;
if prev == self.next_ready {
// The only ready entry.
debug_assert_eq!(next, self.next_ready);
self.next_ready = self.pairs.len();
} else {
let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
let prev_next = unsafe { prev_entry.state.replace_next(next) };
debug_assert_eq!(prev_next, self.next_ready);
let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
let next_prev = unsafe { next_entry.state.replace_prev(prev) };
debug_assert_eq!(next_prev, self.next_ready);
self.next_ready = next;
}
match ready {
Left => 0,
Right => 1,
}
}
};
Some(SizeBlockEntry {
chunk,
offset: offset + bit as u64 * size,
index: (ready << 1) | bit as usize,
})
}
fn release(&mut self, index: usize) -> Release {
let side = match index & 1 {
0 => Side::Left,
1 => Side::Right,
_ => unsafe { unreachable_unchecked() },
};
let entry_index = index >> 1;
let len = self.pairs.len();
let entry = self.pairs.get_mut(entry_index);
let chunk = entry.chunk;
let offset = entry.offset;
let parent = entry.parent;
match entry.state {
PairState::Exhausted => {
if self.next_ready == len {
entry.state = PairState::Ready {
ready: side,
next: entry_index,
prev: entry_index,
};
self.next_ready = entry_index;
} else {
debug_assert!(self.next_ready < len);
let next = self.next_ready;
let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
let prev = unsafe { next_entry.state.replace_prev(entry_index) };
let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
let prev_next = unsafe { prev_entry.state.replace_next(entry_index) };
debug_assert_eq!(prev_next, next);
let entry = unsafe { self.pairs.get_unchecked_mut(entry_index) };
entry.state = PairState::Ready {
ready: side,
next,
prev,
};
}
Release::None
}
PairState::Ready { ready, .. } if ready == side => {
panic!("Attempt to dealloate already free block")
}
PairState::Ready { next, prev, .. } => {
unsafe {
self.pairs.remove_unchecked(entry_index);
}
if prev == entry_index {
debug_assert_eq!(next, entry_index);
self.next_ready = self.pairs.len();
} else {
let prev_entry = unsafe { self.pairs.get_unchecked_mut(prev) };
let prev_next = unsafe { prev_entry.state.replace_next(next) };
debug_assert_eq!(prev_next, entry_index);
let next_entry = unsafe { self.pairs.get_unchecked_mut(next) };
let next_prev = unsafe { next_entry.state.replace_prev(prev) };
debug_assert_eq!(next_prev, entry_index);
self.next_ready = next;
}
match parent {
Some(parent) => Release::Parent(parent),
None => {
debug_assert_eq!(offset, 0);
Release::Chunk(chunk)
}
}
}
}
}
}
#[derive(Debug)]
struct Chunk<M> {
memory: Arc<M>,
ptr: Option<NonNull<u8>>,
size: u64,
}
#[derive(Debug)]
pub(crate) struct BuddyAllocator<M> {
minimal_size: u64,
chunks: Slab<Chunk<M>>,
sizes: Vec<Size>,
memory_type: u32,
props: MemoryPropertyFlags,
atom_mask: u64,
}
unsafe impl<M> Sync for BuddyAllocator<M> where M: Sync {}
unsafe impl<M> Send for BuddyAllocator<M> where M: Send {}
impl<M> BuddyAllocator<M>
where
M: MemoryBounds + 'static,
{
pub fn new(
minimal_size: u64,
initial_dedicated_size: u64,
memory_type: u32,
props: MemoryPropertyFlags,
atom_mask: u64,
) -> Self {
assert!(
minimal_size.is_power_of_two(),
"Minimal allocation size of buddy allocator must be power of two"
);
assert!(
initial_dedicated_size.is_power_of_two(),
"Dedicated allocation size of buddy allocator must be power of two"
);
let initial_sizes = (initial_dedicated_size
.trailing_zeros()
.saturating_sub(minimal_size.trailing_zeros())) as usize;
BuddyAllocator {
minimal_size,
chunks: Slab::new(),
sizes: (0..initial_sizes).map(|_| Size::new()).collect(),
memory_type,
props,
atom_mask: atom_mask | (minimal_size - 1),
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn alloc(
&mut self,
device: &impl MemoryDevice<M>,
size: u64,
align_mask: u64,
flags: AllocationFlags,
heap: &mut Heap,
allocations_remains: &mut u32,
) -> Result<BuddyBlock<M>, AllocationError> {
let align_mask = align_mask | self.atom_mask;
let size = align_up(size, align_mask)
.and_then(|size| size.checked_next_power_of_two())
.ok_or(AllocationError::OutOfDeviceMemory)?;
let size = size.max(self.minimal_size);
let size_index = size.trailing_zeros() - self.minimal_size.trailing_zeros();
let size_index =
usize::try_from(size_index).map_err(|_| AllocationError::OutOfDeviceMemory)?;
while self.sizes.len() <= size_index {
self.sizes.push(Size::new());
}
let host_visible = self.host_visible();
let mut candidate_size_index = size_index;
let (mut entry, entry_size_index) = loop {
let sizes_len = self.sizes.len();
let candidate_size_entry = &mut self.sizes[candidate_size_index];
let candidate_size = self.minimal_size << candidate_size_index;
if let Some(entry) = candidate_size_entry.acquire(candidate_size) {
break (entry, candidate_size_index);
}
if sizes_len == candidate_size_index + 1 {
// That's size of device allocation.
if *allocations_remains == 0 {
return Err(AllocationError::TooManyObjects);
}
let chunk_size = self.minimal_size << (candidate_size_index + 1);
let mut memory = device.allocate_memory(chunk_size, self.memory_type, flags)?;
*allocations_remains -= 1;
heap.alloc(chunk_size);
let ptr = if host_visible {
match device.map_memory(&mut memory, 0, chunk_size) {
Ok(ptr) => Some(ptr),
Err(DeviceMapError::OutOfDeviceMemory) => {
return Err(AllocationError::OutOfDeviceMemory)
}
Err(DeviceMapError::MapFailed) | Err(DeviceMapError::OutOfHostMemory) => {
return Err(AllocationError::OutOfHostMemory)
}
}
} else {
None
};
let chunk = self.chunks.insert(Chunk {
memory: Arc::new(memory),
ptr,
size: chunk_size,
});
let entry = candidate_size_entry.add_pair_and_acquire_left(chunk, 0, None);
break (entry, candidate_size_index);
}
candidate_size_index += 1;
};
for size_index in (size_index..entry_size_index).rev() {
let size_entry = &mut self.sizes[size_index];
entry =
size_entry.add_pair_and_acquire_left(entry.chunk, entry.offset, Some(entry.index));
}
let chunk_entry = self.chunks.get_unchecked(entry.chunk);
debug_assert!(
entry
.offset
.checked_add(size)
.map_or(false, |end| end <= chunk_entry.size),
"Offset + size is not in chunk bounds"
);
Ok(BuddyBlock {
memory: chunk_entry.memory.clone(),
ptr: chunk_entry
.ptr
.map(|ptr| NonNull::new_unchecked(ptr.as_ptr().add(entry.offset as usize))),
offset: entry.offset,
size,
chunk: entry.chunk,
index: entry.index,
})
}
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn dealloc(
&mut self,
device: &impl MemoryDevice<M>,
block: BuddyBlock<M>,
heap: &mut Heap,
allocations_remains: &mut u32,
) {
debug_assert!(block.size.is_power_of_two());
let size_index =
(block.size.trailing_zeros() - self.minimal_size.trailing_zeros()) as usize;
let mut release_index = block.index;
let mut release_size_index = size_index;
loop {
match self.sizes[release_size_index].release(release_index) {
Release::Parent(parent) => {
release_size_index += 1;
release_index = parent;
}
Release::Chunk(chunk) => {
debug_assert_eq!(chunk, block.chunk);
debug_assert_eq!(
self.chunks.get(chunk).size,
self.minimal_size << (release_size_index + 1)
);
let chunk = self.chunks.remove(chunk);
drop(block);
let memory = try_arc_unwrap(chunk.memory)
.expect("Memory shared after last block deallocated");
device.deallocate_memory(memory);
*allocations_remains += 1;
heap.dealloc(chunk.size);
return;
}
Release::None => return,
}
}
}
fn host_visible(&self) -> bool {
self.props.contains(MemoryPropertyFlags::HOST_VISIBLE)
}
}

77
vendor/gpu-alloc/src/config.rs vendored Normal file
View File

@@ -0,0 +1,77 @@
/// Configuration for [`GpuAllocator`]
///
/// [`GpuAllocator`]: type.GpuAllocator
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Config {
/// Size in bytes of request that will be served by dedicated memory object.
/// This value should be large enough to not exhaust memory object limit
/// and not use slow memory object allocation when it is not necessary.
pub dedicated_threshold: u64,
/// Size in bytes of request that will be served by dedicated memory object if preferred.
/// This value should be large enough to not exhaust memory object limit
/// and not use slow memory object allocation when it is not necessary.
///
/// This won't make much sense if this value is larger than `dedicated_threshold`.
pub preferred_dedicated_threshold: u64,
/// Size in bytes of transient memory request that will be served by dedicated memory object.
/// This value should be large enough to not exhaust memory object limit
/// and not use slow memory object allocation when it is not necessary.
///
/// This won't make much sense if this value is lesser than `dedicated_threshold`.
pub transient_dedicated_threshold: u64,
/// Size in bytes of first chunk in free-list allocator.
pub starting_free_list_chunk: u64,
/// Upper limit for size in bytes of chunks in free-list allocator.
pub final_free_list_chunk: u64,
/// Minimal size for buddy allocator.
pub minimal_buddy_size: u64,
/// Initial memory object size for buddy allocator.
/// If less than `minimal_buddy_size` then `minimal_buddy_size` is used instead.
pub initial_buddy_dedicated_size: u64,
}
impl Config {
/// Returns default configuration.
///
/// This is not `Default` implementation to discourage usage outside of
/// prototyping.
///
/// Proper configuration should depend on hardware and intended usage.\
/// But those values can be used as starting point.\
/// Note that they can simply not work for some platforms with lesser
/// memory capacity than today's "modern" GPU (year 2020).
pub fn i_am_prototyping() -> Self {
// Assume that today's modern GPU is made of 1024 potatoes.
let potato = Config::i_am_potato();
Config {
dedicated_threshold: potato.dedicated_threshold * 1024,
preferred_dedicated_threshold: potato.preferred_dedicated_threshold * 1024,
transient_dedicated_threshold: potato.transient_dedicated_threshold * 1024,
starting_free_list_chunk: potato.starting_free_list_chunk * 1024,
final_free_list_chunk: potato.final_free_list_chunk * 1024,
minimal_buddy_size: potato.minimal_buddy_size * 1024,
initial_buddy_dedicated_size: potato.initial_buddy_dedicated_size * 1024,
}
}
/// Returns default configuration for average sized potato.
pub fn i_am_potato() -> Self {
Config {
dedicated_threshold: 32 * 1024,
preferred_dedicated_threshold: 1024,
transient_dedicated_threshold: 128 * 1024,
starting_free_list_chunk: 8 * 1024,
final_free_list_chunk: 128 * 1024,
minimal_buddy_size: 1,
initial_buddy_dedicated_size: 8 * 1024,
}
}
}

116
vendor/gpu-alloc/src/error.rs vendored Normal file
View File

@@ -0,0 +1,116 @@
use {
core::fmt::{self, Display},
gpu_alloc_types::{DeviceMapError, OutOfMemory},
};
/// Enumeration of possible errors that may occur during memory allocation.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum AllocationError {
/// Backend reported that device memory has been exhausted.\
/// Deallocating device memory from the same heap may increase chance
/// that another allocation would succeed.
OutOfDeviceMemory,
/// Backend reported that host memory has been exhausted.\
/// Deallocating host memory may increase chance that another allocation would succeed.
OutOfHostMemory,
/// Allocation request cannot be fulfilled as no available memory types allowed
/// by `Request.memory_types` mask is compatible with `request.usage`.
NoCompatibleMemoryTypes,
/// Reached limit on allocated memory objects count.\
/// Deallocating device memory may increase chance that another allocation would succeed.
/// Especially dedicated memory blocks.
///
/// If this error is returned when memory heaps are far from exhausted
/// `Config` should be tweaked to allocate larger memory objects.
TooManyObjects,
}
impl From<OutOfMemory> for AllocationError {
fn from(err: OutOfMemory) -> Self {
match err {
OutOfMemory::OutOfDeviceMemory => AllocationError::OutOfDeviceMemory,
OutOfMemory::OutOfHostMemory => AllocationError::OutOfHostMemory,
}
}
}
impl Display for AllocationError {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
AllocationError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"),
AllocationError::OutOfHostMemory => fmt.write_str("Host memory exhausted"),
AllocationError::NoCompatibleMemoryTypes => fmt.write_str(
"No compatible memory types from requested types support requested usage",
),
AllocationError::TooManyObjects => {
fmt.write_str("Reached limit on allocated memory objects count")
}
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for AllocationError {}
/// Enumeration of possible errors that may occur during memory mapping.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum MapError {
/// Backend reported that device memory has been exhausted.\
/// Deallocating device memory from the same heap may increase chance
/// that another mapping would succeed.
OutOfDeviceMemory,
/// Backend reported that host memory has been exhausted.\
/// Deallocating host memory may increase chance that another mapping would succeed.
OutOfHostMemory,
/// Attempt to map memory block with non-host-visible memory type.\
/// Ensure to include `UsageFlags::HOST_ACCESS` into allocation request
/// when memory mapping is intended.
NonHostVisible,
/// Map failed for implementation specific reason.\
/// For Vulkan backend this includes failed attempt
/// to allocate large enough virtual address space.
MapFailed,
/// Mapping failed due to block being already mapped.
AlreadyMapped,
}
impl From<DeviceMapError> for MapError {
fn from(err: DeviceMapError) -> Self {
match err {
DeviceMapError::OutOfDeviceMemory => MapError::OutOfDeviceMemory,
DeviceMapError::OutOfHostMemory => MapError::OutOfHostMemory,
DeviceMapError::MapFailed => MapError::MapFailed,
}
}
}
impl From<OutOfMemory> for MapError {
fn from(err: OutOfMemory) -> Self {
match err {
OutOfMemory::OutOfDeviceMemory => MapError::OutOfDeviceMemory,
OutOfMemory::OutOfHostMemory => MapError::OutOfHostMemory,
}
}
}
impl Display for MapError {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MapError::OutOfDeviceMemory => fmt.write_str("Device memory exhausted"),
MapError::OutOfHostMemory => fmt.write_str("Host memory exhausted"),
MapError::MapFailed => fmt.write_str("Failed to map memory object"),
MapError::NonHostVisible => fmt.write_str("Impossible to map non-host-visible memory"),
MapError::AlreadyMapped => fmt.write_str("Block is already mapped"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for MapError {}

528
vendor/gpu-alloc/src/freelist.rs vendored Normal file
View File

@@ -0,0 +1,528 @@
use {
crate::{
align_down, align_up,
error::AllocationError,
heap::Heap,
util::{arc_unwrap, is_arc_unique},
MemoryBounds,
},
alloc::{sync::Arc, vec::Vec},
core::{cmp::Ordering, ptr::NonNull},
gpu_alloc_types::{AllocationFlags, DeviceMapError, MemoryDevice, MemoryPropertyFlags},
};
unsafe fn opt_ptr_add(ptr: Option<NonNull<u8>>, size: u64) -> Option<NonNull<u8>> {
ptr.map(|ptr| {
// Size is within memory region started at `ptr`.
// size is within `chunk_size` that fits `isize`.
NonNull::new_unchecked(ptr.as_ptr().offset(size as isize))
})
}
#[derive(Debug)]
pub(super) struct FreeList<M> {
array: Vec<FreeListRegion<M>>,
counter: u64,
}
impl<M> FreeList<M> {
pub fn new() -> Self {
FreeList {
array: Vec::new(),
counter: 0,
}
}
pub fn get_block_from_new_memory(
&mut self,
memory: Arc<M>,
memory_size: u64,
ptr: Option<NonNull<u8>>,
align_mask: u64,
size: u64,
) -> FreeListBlock<M> {
debug_assert!(size <= memory_size);
self.counter += 1;
self.array.push(FreeListRegion {
memory,
ptr,
chunk: self.counter,
start: 0,
end: memory_size,
});
self.get_block_at(self.array.len() - 1, align_mask, size)
}
pub fn get_block(&mut self, align_mask: u64, size: u64) -> Option<FreeListBlock<M>> {
let (index, _) = self.array.iter().enumerate().rev().find(|(_, region)| {
match region.end.checked_sub(size) {
Some(start) => {
let aligned_start = align_down(start, align_mask);
aligned_start >= region.start
}
None => false,
}
})?;
Some(self.get_block_at(index, align_mask, size))
}
fn get_block_at(&mut self, index: usize, align_mask: u64, size: u64) -> FreeListBlock<M> {
let region = &mut self.array[index];
let start = region.end - size;
let aligned_start = align_down(start, align_mask);
if aligned_start > region.start {
let block = FreeListBlock {
offset: aligned_start,
size: region.end - aligned_start,
chunk: region.chunk,
ptr: unsafe { opt_ptr_add(region.ptr, aligned_start - region.start) },
memory: region.memory.clone(),
};
region.end = aligned_start;
block
} else {
debug_assert_eq!(aligned_start, region.start);
let region = self.array.remove(index);
region.into_block()
}
}
pub fn insert_block(&mut self, block: FreeListBlock<M>) {
match self.array.binary_search_by(|b| b.cmp(&block)) {
Ok(_) => {
panic!("Overlapping block found in free list");
}
Err(index) if self.array.len() > index => match &mut self.array[..=index] {
[] => unreachable!(),
[next] => {
debug_assert!(!next.is_suffix_block(&block));
if next.is_prefix_block(&block) {
next.merge_prefix_block(block);
} else {
self.array.insert(0, FreeListRegion::from_block(block));
}
}
[.., prev, next] => {
debug_assert!(!prev.is_prefix_block(&block));
debug_assert!(!next.is_suffix_block(&block));
if next.is_prefix_block(&block) {
next.merge_prefix_block(block);
if prev.consecutive(&*next) {
let next = self.array.remove(index);
let prev = &mut self.array[index - 1];
prev.merge(next);
}
} else if prev.is_suffix_block(&block) {
prev.merge_suffix_block(block);
} else {
self.array.insert(index, FreeListRegion::from_block(block));
}
}
},
Err(_) => match &mut self.array[..] {
[] => self.array.push(FreeListRegion::from_block(block)),
[.., prev] => {
debug_assert!(!prev.is_prefix_block(&block));
if prev.is_suffix_block(&block) {
prev.merge_suffix_block(block);
} else {
self.array.push(FreeListRegion::from_block(block));
}
}
},
}
}
pub fn drain(&mut self, keep_last: bool) -> Option<impl Iterator<Item = (M, u64)> + '_> {
// Time to deallocate
let len = self.array.len();
let mut del = 0;
{
let regions = &mut self.array[..];
for i in 0..len {
if (i < len - 1 || !keep_last) && is_arc_unique(&mut regions[i].memory) {
del += 1;
} else if del > 0 {
regions.swap(i - del, i);
}
}
}
if del > 0 {
Some(self.array.drain(len - del..).map(move |region| {
debug_assert_eq!(region.start, 0);
(unsafe { arc_unwrap(region.memory) }, region.end)
}))
} else {
None
}
}
}
#[derive(Debug)]
struct FreeListRegion<M> {
memory: Arc<M>,
ptr: Option<NonNull<u8>>,
chunk: u64,
start: u64,
end: u64,
}
unsafe impl<M> Sync for FreeListRegion<M> where M: Sync {}
unsafe impl<M> Send for FreeListRegion<M> where M: Send {}
impl<M> FreeListRegion<M> {
pub fn cmp(&self, block: &FreeListBlock<M>) -> Ordering {
debug_assert_eq!(
Arc::ptr_eq(&self.memory, &block.memory),
self.chunk == block.chunk
);
if self.chunk == block.chunk {
debug_assert_eq!(
Ord::cmp(&self.start, &block.offset),
Ord::cmp(&self.end, &(block.offset + block.size)),
"Free region {{ start: {}, end: {} }} overlaps with block {{ offset: {}, size: {} }}",
self.start,
self.end,
block.offset,
block.size,
);
}
Ord::cmp(&self.chunk, &block.chunk).then(Ord::cmp(&self.start, &block.offset))
}
fn from_block(block: FreeListBlock<M>) -> Self {
FreeListRegion {
memory: block.memory,
chunk: block.chunk,
ptr: block.ptr,
start: block.offset,
end: block.offset + block.size,
}
}
fn into_block(self) -> FreeListBlock<M> {
FreeListBlock {
memory: self.memory,
chunk: self.chunk,
ptr: self.ptr,
offset: self.start,
size: self.end - self.start,
}
}
fn consecutive(&self, other: &Self) -> bool {
if self.chunk != other.chunk {
return false;
}
debug_assert!(Arc::ptr_eq(&self.memory, &other.memory));
debug_assert_eq!(
Ord::cmp(&self.start, &other.start),
Ord::cmp(&self.end, &other.end)
);
self.end == other.start
}
fn merge(&mut self, next: FreeListRegion<M>) {
debug_assert!(self.consecutive(&next));
self.end = next.end;
}
fn is_prefix_block(&self, block: &FreeListBlock<M>) -> bool {
if self.chunk != block.chunk {
return false;
}
debug_assert!(Arc::ptr_eq(&self.memory, &block.memory));
debug_assert_eq!(
Ord::cmp(&self.start, &block.offset),
Ord::cmp(&self.end, &(block.offset + block.size))
);
self.start == (block.offset + block.size)
}
fn merge_prefix_block(&mut self, block: FreeListBlock<M>) {
debug_assert!(self.is_prefix_block(&block));
self.start = block.offset;
self.ptr = block.ptr;
}
fn is_suffix_block(&self, block: &FreeListBlock<M>) -> bool {
if self.chunk != block.chunk {
return false;
}
debug_assert!(Arc::ptr_eq(&self.memory, &block.memory));
debug_assert_eq!(
Ord::cmp(&self.start, &block.offset),
Ord::cmp(&self.end, &(block.offset + block.size))
);
self.end == block.offset
}
fn merge_suffix_block(&mut self, block: FreeListBlock<M>) {
debug_assert!(self.is_suffix_block(&block));
self.end += block.size;
}
}
#[derive(Debug)]
pub struct FreeListBlock<M> {
pub memory: Arc<M>,
pub ptr: Option<NonNull<u8>>,
pub chunk: u64,
pub offset: u64,
pub size: u64,
}
unsafe impl<M> Sync for FreeListBlock<M> where M: Sync {}
unsafe impl<M> Send for FreeListBlock<M> where M: Send {}
#[derive(Debug)]
pub(crate) struct FreeListAllocator<M> {
freelist: FreeList<M>,
chunk_size: u64,
final_chunk_size: u64,
memory_type: u32,
props: MemoryPropertyFlags,
atom_mask: u64,
total_allocations: u64,
total_deallocations: u64,
}
impl<M> Drop for FreeListAllocator<M> {
fn drop(&mut self) {
match Ord::cmp(&self.total_allocations, &self.total_deallocations) {
Ordering::Equal => {}
Ordering::Greater => {
report_error_on_drop!("Not all blocks were deallocated")
}
Ordering::Less => {
report_error_on_drop!("More blocks deallocated than allocated")
}
}
if !self.freelist.array.is_empty() {
report_error_on_drop!(
"FreeListAllocator has free blocks on drop. Allocator should be cleaned"
);
}
}
}
impl<M> FreeListAllocator<M>
where
M: MemoryBounds + 'static,
{
pub fn new(
starting_chunk_size: u64,
final_chunk_size: u64,
memory_type: u32,
props: MemoryPropertyFlags,
atom_mask: u64,
) -> Self {
debug_assert_eq!(
align_down(starting_chunk_size, atom_mask),
starting_chunk_size
);
let starting_chunk_size = min(starting_chunk_size, isize::max_value());
debug_assert_eq!(align_down(final_chunk_size, atom_mask), final_chunk_size);
let final_chunk_size = min(final_chunk_size, isize::max_value());
FreeListAllocator {
freelist: FreeList::new(),
chunk_size: starting_chunk_size,
final_chunk_size,
memory_type,
props,
atom_mask,
total_allocations: 0,
total_deallocations: 0,
}
}
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn alloc(
&mut self,
device: &impl MemoryDevice<M>,
size: u64,
align_mask: u64,
flags: AllocationFlags,
heap: &mut Heap,
allocations_remains: &mut u32,
) -> Result<FreeListBlock<M>, AllocationError> {
debug_assert!(
self.final_chunk_size >= size,
"GpuAllocator must not request allocations equal or greater to chunks size"
);
let size = align_up(size, self.atom_mask).expect(
"Any value not greater than final chunk size (which is aligned) has to fit for alignment",
);
let align_mask = align_mask | self.atom_mask;
let host_visible = self.host_visible();
if size <= self.chunk_size {
// Otherwise there can't be any sufficiently large free blocks
if let Some(block) = self.freelist.get_block(align_mask, size) {
self.total_allocations += 1;
return Ok(block);
}
}
// New allocation is required.
if *allocations_remains == 0 {
return Err(AllocationError::TooManyObjects);
}
if size > self.chunk_size {
let multiple = (size - 1) / self.chunk_size + 1;
let multiple = multiple.next_power_of_two();
self.chunk_size = (self.chunk_size * multiple).min(self.final_chunk_size);
}
let mut memory = device.allocate_memory(self.chunk_size, self.memory_type, flags)?;
*allocations_remains -= 1;
heap.alloc(self.chunk_size);
// Map host visible allocations
let ptr = if host_visible {
match device.map_memory(&mut memory, 0, self.chunk_size) {
Ok(ptr) => Some(ptr),
Err(DeviceMapError::MapFailed) => {
#[cfg(feature = "tracing")]
tracing::error!("Failed to map host-visible memory in linear allocator");
device.deallocate_memory(memory);
*allocations_remains += 1;
heap.dealloc(self.chunk_size);
return Err(AllocationError::OutOfHostMemory);
}
Err(DeviceMapError::OutOfDeviceMemory) => {
return Err(AllocationError::OutOfDeviceMemory);
}
Err(DeviceMapError::OutOfHostMemory) => {
return Err(AllocationError::OutOfHostMemory);
}
}
} else {
None
};
let memory = Arc::new(memory);
let block =
self.freelist
.get_block_from_new_memory(memory, self.chunk_size, ptr, align_mask, size);
if self.chunk_size < self.final_chunk_size {
// Double next chunk size
// Limit to final value.
self.chunk_size = (self.chunk_size * 2).min(self.final_chunk_size);
}
self.total_allocations += 1;
Ok(block)
}
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn dealloc(
&mut self,
device: &impl MemoryDevice<M>,
block: FreeListBlock<M>,
heap: &mut Heap,
allocations_remains: &mut u32,
) {
debug_assert!(block.size < self.chunk_size);
debug_assert_ne!(block.size, 0);
self.freelist.insert_block(block);
self.total_deallocations += 1;
if let Some(memory) = self.freelist.drain(true) {
memory.for_each(|(memory, size)| {
device.deallocate_memory(memory);
*allocations_remains += 1;
heap.dealloc(size);
});
}
}
/// Deallocates leftover memory objects.
/// Should be used before dropping.
///
/// # Safety
///
/// * `device` must be one with `DeviceProperties` that were provided to create this `GpuAllocator` instance
/// * Same `device` instance must be used for all interactions with one `GpuAllocator` instance
/// and memory blocks allocated from it
#[cfg_attr(feature = "tracing", tracing::instrument(skip(self, device)))]
pub unsafe fn cleanup(
&mut self,
device: &impl MemoryDevice<M>,
heap: &mut Heap,
allocations_remains: &mut u32,
) {
if let Some(memory) = self.freelist.drain(false) {
memory.for_each(|(memory, size)| {
device.deallocate_memory(memory);
*allocations_remains += 1;
heap.dealloc(size);
});
}
#[cfg(feature = "tracing")]
{
if self.total_allocations == self.total_deallocations && !self.freelist.array.is_empty()
{
tracing::error!(
"Some regions were not deallocated on cleanup, although all blocks are free.
This is a bug in `FreeBlockAllocator`.
See array of free blocks left:
{:#?}",
self.freelist.array,
);
}
}
}
fn host_visible(&self) -> bool {
self.props.contains(MemoryPropertyFlags::HOST_VISIBLE)
}
}
fn min<L, R>(l: L, r: R) -> L
where
R: core::convert::TryInto<L>,
L: Ord,
{
match r.try_into() {
Ok(r) => core::cmp::min(l, r),
Err(_) => l,
}
}

32
vendor/gpu-alloc/src/heap.rs vendored Normal file
View File

@@ -0,0 +1,32 @@
#[derive(Debug)]
pub(crate) struct Heap {
size: u64,
used: u64,
allocated: u128,
deallocated: u128,
}
impl Heap {
pub(crate) fn new(size: u64) -> Self {
Heap {
size,
used: 0,
allocated: 0,
deallocated: 0,
}
}
pub(crate) fn size(&mut self) -> u64 {
self.size
}
pub(crate) fn alloc(&mut self, size: u64) {
self.used += size;
self.allocated += u128::from(size);
}
pub(crate) fn dealloc(&mut self, size: u64) {
self.used -= size;
self.deallocated += u128::from(size);
}
}

124
vendor/gpu-alloc/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,124 @@
//!
//! Implementation agnostic memory allocator for Vulkan like APIs.
//!
//! This crate is intended to be used as part of safe API implementations.\
//! Use with caution. There are unsafe functions all over the place.
//!
//! # Usage
//!
//! Start with fetching `DeviceProperties` from `gpu-alloc-<backend>` crate for the backend of choice.\
//! Then create `GpuAllocator` instance and use it for all device memory allocations.\
//! `GpuAllocator` will take care for all necessary bookkeeping like memory object count limit,
//! heap budget and memory mapping.
//!
//! ### Backends implementations
//!
//! Backend supporting crates should not depend on this crate.\
//! Instead they should depend on `gpu-alloc-types` which is much more stable,
//! allowing to upgrade `gpu-alloc` version without `gpu-alloc-<backend>` upgrade.
//!
#![cfg_attr(not(feature = "std"), no_std)]
extern crate alloc;
#[cfg(feature = "tracing")]
macro_rules! report_error_on_drop {
($($tokens:tt)*) => {{
#[cfg(feature = "std")]
{
if std::thread::panicking() {
return;
}
}
tracing::error!($($tokens)*)
}};
}
#[cfg(all(not(feature = "tracing"), feature = "std"))]
macro_rules! report_error_on_drop {
($($tokens:tt)*) => {{
if std::thread::panicking() {
return;
}
eprintln!($($tokens)*)
}};
}
#[cfg(all(not(feature = "tracing"), not(feature = "std")))]
macro_rules! report_error_on_drop {
($($tokens:tt)*) => {{
panic!($($tokens)*)
}};
}
mod allocator;
mod block;
mod buddy;
mod config;
mod error;
mod freelist;
mod heap;
mod slab;
mod usage;
mod util;
pub use {
self::{allocator::*, block::MemoryBlock, config::*, error::*, usage::*},
gpu_alloc_types::*,
};
/// Memory request for allocator.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct Request {
/// Minimal size of memory block required.
/// Returned block may have larger size,
/// use `MemoryBlock::size` to learn actual size of returned block.
pub size: u64,
/// Minimal alignment mask required.
/// Returned block may have larger alignment,
/// use `MemoryBlock::align` to learn actual alignment of returned block.
pub align_mask: u64,
/// Intended memory usage.
/// Returned block may support additional usages,
/// use `MemoryBlock::props` to learn memory properties of returned block.
pub usage: UsageFlags,
/// Bitset for memory types.
/// Returned block will be from memory type corresponding to one of set bits,
/// use `MemoryBlock::memory_type` to learn memory type index of returned block.
pub memory_types: u32,
}
/// Aligns `value` up to `align_mask`
/// Returns smallest integer not lesser than `value` aligned by `align_mask`.
/// Returns `None` on overflow.
pub(crate) fn align_up(value: u64, align_mask: u64) -> Option<u64> {
Some(value.checked_add(align_mask)? & !align_mask)
}
/// Align `value` down to `align_mask`
/// Returns largest integer not bigger than `value` aligned by `align_mask`.
pub(crate) fn align_down(value: u64, align_mask: u64) -> u64 {
value & !align_mask
}
#[cfg(debug_assertions)]
#[allow(unused_unsafe)]
unsafe fn unreachable_unchecked() -> ! {
unreachable!()
}
#[cfg(not(debug_assertions))]
unsafe fn unreachable_unchecked() -> ! {
core::hint::unreachable_unchecked()
}
// #[cfg(feature = "tracing")]
use core::fmt::Debug as MemoryBounds;
// #[cfg(not(feature = "tracing"))]
// use core::any::Any as MemoryBounds;

97
vendor/gpu-alloc/src/slab.rs vendored Normal file
View File

@@ -0,0 +1,97 @@
use {crate::unreachable_unchecked, alloc::vec::Vec, core::mem::replace};
#[derive(Debug)]
enum Entry<T> {
Vacant(usize),
Occupied(T),
}
#[derive(Debug)]
pub(crate) struct Slab<T> {
next_vacant: usize,
entries: Vec<Entry<T>>,
}
impl<T> Slab<T> {
pub fn new() -> Self {
Slab {
next_vacant: !0,
entries: Vec::new(),
}
}
/// Inserts value into this linked vec and returns index
/// at which value can be accessed in constant time.
pub fn insert(&mut self, value: T) -> usize {
if self.next_vacant >= self.entries.len() {
self.entries.push(Entry::Occupied(value));
self.entries.len() - 1
} else {
match *unsafe { self.entries.get_unchecked(self.next_vacant) } {
Entry::Vacant(next_vacant) => {
unsafe {
*self.entries.get_unchecked_mut(self.next_vacant) = Entry::Occupied(value);
}
replace(&mut self.next_vacant, next_vacant)
}
_ => unsafe { unreachable_unchecked() },
}
}
}
pub fn len(&self) -> usize {
self.entries.len()
}
pub unsafe fn get_unchecked(&self, index: usize) -> &T {
debug_assert!(index < self.len());
match self.entries.get_unchecked(index) {
Entry::Occupied(value) => value,
_ => unreachable_unchecked(),
}
}
pub unsafe fn get_unchecked_mut(&mut self, index: usize) -> &mut T {
debug_assert!(index < self.len());
match self.entries.get_unchecked_mut(index) {
Entry::Occupied(value) => value,
_ => unreachable_unchecked(),
}
}
pub fn get(&self, index: usize) -> &T {
match self.entries.get(index) {
Some(Entry::Occupied(value)) => value,
_ => panic!("Invalid index"),
}
}
pub fn get_mut(&mut self, index: usize) -> &mut T {
match self.entries.get_mut(index) {
Some(Entry::Occupied(value)) => value,
_ => panic!("Invalid index"),
}
}
pub unsafe fn remove_unchecked(&mut self, index: usize) -> T {
let entry = replace(
self.entries.get_unchecked_mut(index),
Entry::Vacant(self.next_vacant),
);
self.next_vacant = index;
match entry {
Entry::Occupied(value) => value,
_ => unreachable_unchecked(),
}
}
pub fn remove(&mut self, index: usize) -> T {
match self.entries.get_mut(index) {
Some(Entry::Occupied(_)) => unsafe { self.remove_unchecked(index) },
_ => panic!("Invalid index"),
}
}
}

176
vendor/gpu-alloc/src/usage.rs vendored Normal file
View File

@@ -0,0 +1,176 @@
use {
core::fmt::{self, Debug},
gpu_alloc_types::{MemoryPropertyFlags, MemoryType},
};
bitflags::bitflags! {
/// Memory usage type.
/// Bits set define intended usage for requested memory.
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct UsageFlags: u8 {
/// Hints for allocator to find memory with faster device access.
/// If no flags is specified than `FAST_DEVICE_ACCESS` is implied.
const FAST_DEVICE_ACCESS = 0x01;
/// Memory will be accessed from host.
/// This flags guarantees that host memory operations will be available.
/// Otherwise implementation is encouraged to use non-host-accessible memory.
const HOST_ACCESS = 0x02;
/// Hints allocator that memory will be used for data downloading.
/// Allocator will strongly prefer host-cached memory.
/// Implies `HOST_ACCESS` flag.
const DOWNLOAD = 0x04;
/// Hints allocator that memory will be used for data uploading.
/// If `DOWNLOAD` flag is not set then allocator will assume that
/// host will access memory in write-only manner and may
/// pick not host-cached.
/// Implies `HOST_ACCESS` flag.
const UPLOAD = 0x08;
/// Hints allocator that memory will be used for short duration
/// allowing to use faster algorithm with less memory overhead.
/// If use holds returned memory block for too long then
/// effective memory overhead increases instead.
/// Best use case is for staging buffer for single batch of operations.
const TRANSIENT = 0x10;
/// Requests memory that can be addressed with `u64`.
/// Allows fetching device address for resources bound to that memory.
const DEVICE_ADDRESS = 0x20;
}
}
#[derive(Clone, Copy, Debug)]
struct MemoryForOneUsage {
mask: u32,
types: [u32; 32],
types_count: u32,
}
pub(crate) struct MemoryForUsage {
usages: [MemoryForOneUsage; 64],
}
impl Debug for MemoryForUsage {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_struct("MemoryForUsage")
.field("usages", &&self.usages[..])
.finish()
}
}
impl MemoryForUsage {
pub fn new(memory_types: &[MemoryType]) -> Self {
assert!(
memory_types.len() <= 32,
"Only up to 32 memory types supported"
);
let mut mfu = MemoryForUsage {
usages: [MemoryForOneUsage {
mask: 0,
types: [0; 32],
types_count: 0,
}; 64],
};
for usage in 0..64 {
mfu.usages[usage as usize] =
one_usage(UsageFlags::from_bits_truncate(usage), memory_types);
}
mfu
}
/// Returns mask with bits set for memory type indices that support the
/// usage.
pub fn mask(&self, usage: UsageFlags) -> u32 {
self.usages[usage.bits() as usize].mask
}
/// Returns slice of memory type indices that support the usage.
/// Earlier memory type has priority over later.
pub fn types(&self, usage: UsageFlags) -> &[u32] {
let usage = &self.usages[usage.bits() as usize];
&usage.types[..usage.types_count as usize]
}
}
fn one_usage(usage: UsageFlags, memory_types: &[MemoryType]) -> MemoryForOneUsage {
let mut types = [0; 32];
let mut types_count = 0;
for (index, mt) in memory_types.iter().enumerate() {
if compatible(usage, mt.props) {
types[types_count as usize] = index as u32;
types_count += 1;
}
}
types[..types_count as usize]
.sort_unstable_by_key(|&index| reverse_priority(usage, memory_types[index as usize].props));
let mask = types[..types_count as usize]
.iter()
.fold(0u32, |mask, index| mask | 1u32 << index);
MemoryForOneUsage {
mask,
types,
types_count,
}
}
fn compatible(usage: UsageFlags, flags: MemoryPropertyFlags) -> bool {
type Flags = MemoryPropertyFlags;
if flags.contains(Flags::LAZILY_ALLOCATED) || flags.contains(Flags::PROTECTED) {
// Unsupported
false
} else if usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
{
// Requires HOST_VISIBLE
flags.contains(Flags::HOST_VISIBLE)
} else {
true
}
}
/// Returns reversed priority of memory with specified flags for specified usage.
/// Lesser value returned = more prioritized.
fn reverse_priority(usage: UsageFlags, flags: MemoryPropertyFlags) -> u32 {
type Flags = MemoryPropertyFlags;
// Highly prefer device local memory when `FAST_DEVICE_ACCESS` usage is specified
// or usage is empty.
let device_local: bool = flags.contains(Flags::DEVICE_LOCAL)
^ (usage.is_empty() || usage.contains(UsageFlags::FAST_DEVICE_ACCESS));
assert!(
flags.contains(Flags::HOST_VISIBLE)
|| !usage
.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD)
);
// Prefer non-host-visible memory when host access is not required.
let host_visible: bool = flags.contains(Flags::HOST_VISIBLE)
^ usage.intersects(UsageFlags::HOST_ACCESS | UsageFlags::UPLOAD | UsageFlags::DOWNLOAD);
// Prefer cached memory for downloads.
// Or non-cached if downloads are not expected.
let host_cached: bool =
flags.contains(Flags::HOST_CACHED) ^ usage.contains(UsageFlags::DOWNLOAD);
// Prefer coherent for both uploads and downloads.
// Prefer non-coherent if neither flags is set.
let host_coherent: bool = flags.contains(Flags::HOST_COHERENT)
^ (usage.intersects(UsageFlags::UPLOAD | UsageFlags::DOWNLOAD));
// Each boolean is false if flags are preferred.
device_local as u32 * 8
+ host_visible as u32 * 4
+ host_cached as u32 * 2
+ host_coherent as u32
}

44
vendor/gpu-alloc/src/util.rs vendored Normal file
View File

@@ -0,0 +1,44 @@
use alloc::sync::Arc;
/// Guarantees uniqueness only if `Weak` pointers are never created
/// from this `Arc` or clones.
pub(crate) fn is_arc_unique<M>(arc: &mut Arc<M>) -> bool {
let strong_count = Arc::strong_count(&*arc);
debug_assert_ne!(strong_count, 0, "This Arc should exist");
debug_assert!(
strong_count > 1 || Arc::get_mut(arc).is_some(),
"`Weak` pointer exists"
);
strong_count == 1
}
/// Can be used instead of `Arc::try_unwrap(arc).unwrap()`
/// when it is guaranteed to succeed.
pub(crate) unsafe fn arc_unwrap<M>(mut arc: Arc<M>) -> M {
use core::{mem::ManuallyDrop, ptr::read};
debug_assert!(is_arc_unique(&mut arc));
// Get raw pointer to inner value.
let raw = Arc::into_raw(arc);
// As `Arc` is unique and no Weak pointers exist
// it won't be dereferenced elsewhere.
let inner = read(raw);
// Cast to `ManuallyDrop` which guarantees to have same layout
// and will skip dropping.
drop(Arc::from_raw(raw as *const ManuallyDrop<M>));
inner
}
/// Can be used instead of `Arc::try_unwrap`
/// only if `Weak` pointers are never created from this `Arc` or clones.
pub(crate) unsafe fn try_arc_unwrap<M>(mut arc: Arc<M>) -> Option<M> {
if is_arc_unique(&mut arc) {
Some(arc_unwrap(arc))
} else {
None
}
}