Skip to content

Commit

Permalink
Improve device scoring.
Browse files Browse the repository at this point in the history
Improve searching for the right buffer.
Keep trying all heaps when attempting to allocate memory.
Fixed incorrect type in shader.
  • Loading branch information
zlogic committed Jan 28, 2024
1 parent 9bed704 commit 7bf4208
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 75 deletions.
2 changes: 1 addition & 1 deletion src/correlation/shaders/init_out_data.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ layout(std430, set = 0, binding = 3) buffer Internals_Int
{
// Layout:
// Contains [min, max, neighbor_count] for the corridor range
int internals_int[];
ivec3 internals_int[];
};
layout(std430, set = 0, binding = 4) buffer Result_Matches
{
Expand Down
Binary file modified src/correlation/shaders/init_out_data.spv
Binary file not shown.
134 changes: 60 additions & 74 deletions src/correlation/vk.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};
use std::{cmp::Ordering, collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};

use ash::{prelude::VkResult, vk};
use nalgebra::Matrix3;
Expand Down Expand Up @@ -906,7 +906,7 @@ impl Device {
unsafe fn find_device(
instance: &ash::Instance,
max_buffer_size: usize,
) -> Result<(vk::PhysicalDevice, &'static str, u32), Box<dyn error::Error>> {
) -> Result<(vk::PhysicalDevice, String, u32), Box<dyn error::Error>> {
let devices = instance.enumerate_physical_devices()?;
let device = devices
.iter()
Expand All @@ -923,16 +923,7 @@ impl Device {
let queue_index = Device::find_compute_queue(instance, device)?;

let device_name = CStr::from_ptr(props.device_name.as_ptr());
let device_name = device_name.to_str().unwrap();
println!(
"Device {} type {} {}-{}-{}-{}",
device_name,
props.device_type.as_raw(),
props.limits.max_push_constants_size,
props.limits.max_bound_descriptor_sets,
props.limits.max_storage_buffer_range,
max_buffer_size
);
let device_name = String::from_utf8_lossy(device_name.to_bytes()).to_string();
// TODO: allow to specify a device name filter/regex?
let score = match props.device_type {
vk::PhysicalDeviceType::DISCRETE_GPU => 3,
Expand All @@ -941,24 +932,25 @@ impl Device {
_ => 0,
};
// Prefer real devices instead of dzn emulation.
let dzn_multiplier = if device_name
let is_dzn = device_name
.to_lowercase()
.starts_with("microsoft direct3d12")
{
1
} else {
10
};
Some((device, device_name, queue_index, score * dzn_multiplier))
.starts_with("microsoft direct3d12");
let score = (score, is_dzn);
Some((device, device_name, queue_index, score))
})
.max_by_key(|(_device, _name, _queue_index, score)| *score);
let (device, name, queue_index) = if let Some((device, name, queue_index, _score)) = device
{
.max_by(|(_, _, _, a), (_, _, _, b)| {
if a.1 && !b.1 {
return Ordering::Less;
} else if !a.1 && b.1 {
return Ordering::Greater;
}
return a.0.cmp(&b.0);
});
let (device, name, queue_index) = if let Some((device, name, queue_index, score)) = device {
(device, name, queue_index)
} else {
return Err(GpuError::new("Device not found").into());
};
println!("selected device {}", name);
Ok((device, name, queue_index))
}

Expand Down Expand Up @@ -1009,6 +1001,7 @@ impl Device {
let max_pixels = img1_pixels.max(img2_pixels);
let mut buffers: Vec<Buffer> = vec![];
let cleanup_err = |buffers: &[Buffer], err| {
println!("buffers count is {}", buffers.len());
buffers.iter().for_each(|buffer| {
device.free_memory(buffer.buffer_memory, None);
device.destroy_buffer(buffer.buffer, None)
Expand Down Expand Up @@ -1095,13 +1088,13 @@ impl Device {
buffer_type: BufferType,
) -> Result<Buffer, Box<dyn error::Error>> {
let size = size as u64;
let gpu_local = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => true,
BufferType::HostSource | BufferType::HostDestination => false,
};
let host_visible = match buffer_type {
BufferType::HostSource | BufferType::HostDestination => true,
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => false,
let required_memory_properties = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => {
vk::MemoryPropertyFlags::DEVICE_LOCAL
}
BufferType::HostSource | BufferType::HostDestination => {
vk::MemoryPropertyFlags::HOST_VISIBLE
}
};
let extra_usage_flags = match buffer_type {
BufferType::HostSource => vk::BufferUsageFlags::TRANSFER_SRC,
Expand All @@ -1122,65 +1115,58 @@ impl Device {
};
let buffer = device.create_buffer(&buffer_create_info, None)?;
let memory_requirements = device.get_buffer_memory_requirements(buffer);
let memory_type_index = memory_properties.memory_types
[..memory_properties.memory_type_count as usize]
.iter()
.enumerate()
.find(|(memory_type_index, memory_type)| {
let buffer_memory = (0..memory_properties.memory_type_count as usize)
.flat_map(|i| {
let memory_type = memory_properties.memory_types[i];
if memory_properties.memory_heaps[memory_type.heap_index as usize].size
< memory_requirements.size
{
return false;
};
if (1 << memory_type_index) & memory_requirements.memory_type_bits == 0 {
return false;
return None;
}

if gpu_local
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::DEVICE_LOCAL)
{
return true;
if ((1 << i) & memory_requirements.memory_type_bits) == 0 {
return None;
}
if host_visible
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::HOST_VISIBLE)
{
return true;
let property_flags = memory_type.property_flags;
if !property_flags.contains(required_memory_properties) {
return None;
}
false
});
let memory_type_index = if let Some((index, _)) = memory_type_index {
index as u32
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index: i as u32,
..Default::default()
};
println!(
"Before allocating memory {:?}, {}",
allocate_info.allocation_size, allocate_info.memory_type_index,
);
// Some buffers may fill up, in this case allocating memory can fail.
let mem = device.allocate_memory(&allocate_info, None);
if mem.is_err() {
println!("err allocating memory {}", mem.err().unwrap());
}

Some((mem.ok()?, host_visible, host_coherent))
})
.next();

let (buffer_memory, host_visible, host_coherent) = if let Some(mem) = buffer_memory {
println!("Some memory found");
mem
} else {
println!("failed to allocate");
device.destroy_buffer(buffer, None);
return Err(GpuError::new("Cannot find suitable memory").into());
};
let property_flags =
memory_properties.memory_types[memory_type_index as usize].property_flags;
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index,
..Default::default()
};
let buffer_memory = device.allocate_memory(&allocate_info, None);
let buffer_memory = match buffer_memory {
Ok(mem) => mem,
Err(err) => {
device.destroy_buffer(buffer, None);
return Err(err.into());
}
};
let result = Buffer {
buffer,
buffer_memory,
host_visible,
host_coherent,
size,
};
println!("before binding memory");
device.bind_buffer_memory(buffer, buffer_memory, 0)?;
Ok(result)
}
Expand Down

0 comments on commit 7bf4208

Please sign in to comment.