Skip to content

Commit

Permalink
Improve device scoring.
Browse files Browse the repository at this point in the history
Improve searching for the right buffer.
Keep trying all heaps when attempting to allocate memory.
Fixed incorrect type in shader.
Added a few more tests for sending the matrix and vectors.
  • Loading branch information
zlogic committed Jan 28, 2024
1 parent 9bed704 commit 8897931
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 76 deletions.
1 change: 1 addition & 0 deletions src/correlation/shaders/cross_check_filter.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ void main() {

// TODO: remove this debug code
img1[0] = ivec2(img1_width, img1_height);
img1[1] = ivec2(img2_width, img2_height);
}
3 changes: 2 additions & 1 deletion src/correlation/shaders/init_out_data.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ layout(std430, set = 0, binding = 3) buffer Internals_Int
{
// Layout:
// Contains [min, max, neighbor_count] for the corridor range
int internals_int[];
ivec3 internals_int[];
};
layout(std430, set = 0, binding = 4) buffer Result_Matches
{
Expand All @@ -69,4 +69,5 @@ void main() {

// TODO: remove this debug code
result_corr[0] = threshold;
result_corr[1] = fundamental_matrix[2][0];
}
Binary file modified src/correlation/shaders/init_out_data.spv
Binary file not shown.
146 changes: 71 additions & 75 deletions src/correlation/vk.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};
use std::{cmp::Ordering, collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};

use ash::{prelude::VkResult, vk};
use nalgebra::Matrix3;
Expand Down Expand Up @@ -169,6 +169,9 @@ impl GpuContext {
}
let correlation_values = Grid::new(img1_dimensions.0, img1_dimensions.1, None);

// TODO: remove this debug code
let mut fundamental_matrix = fundamental_matrix.clone();
fundamental_matrix[(2, 0)] = 9.8765;
let params = CorrelationParameters::for_projection(&projection_mode);
let result = GpuContext {
min_stdev: params.min_stdev,
Expand Down Expand Up @@ -477,11 +480,13 @@ impl Device {
instance.destroy_instance(None);
err
};
println!("Created all buffers");
// Init pipelines and shaders.
let descriptor_sets = unsafe {
Device::create_descriptor_sets(&device, &buffers, CorrelationDirection::Forward)
.map_err(cleanup_err)?
};
println!("Created descriptor sets");
let cleanup_err = |err| unsafe {
descriptor_sets.destroy(&device);
buffers.destroy(&device);
Expand All @@ -491,6 +496,7 @@ impl Device {
};
let pipelines =
unsafe { Device::create_pipelines(&device, &descriptor_sets).map_err(cleanup_err)? };
println!("Created pipelines");
let cleanup_err = |err| unsafe {
destroy_pipelines(&device, &pipelines);
descriptor_sets.destroy(&device);
Expand All @@ -502,6 +508,7 @@ impl Device {
// Init control struct - queues, fences, command buffer.
let control =
unsafe { Device::create_control(&device, compute_queue_index).map_err(cleanup_err)? };
println!("Created control");
let result = Device {
_entry: entry,
instance,
Expand Down Expand Up @@ -813,7 +820,11 @@ impl Device {
});
}
// TODO: remove this debug code
println!("Corr check = {:?}", out_image.val(0, 0));
println!(
"Corr check = {:?} {:?}",
out_image.val(0, 0),
out_image.val(1, 0)
);

if !buffer.host_coherent {
let flush_memory_ranges = vk::MappedMemoryRange::builder()
Expand Down Expand Up @@ -906,7 +917,7 @@ impl Device {
unsafe fn find_device(
instance: &ash::Instance,
max_buffer_size: usize,
) -> Result<(vk::PhysicalDevice, &'static str, u32), Box<dyn error::Error>> {
) -> Result<(vk::PhysicalDevice, String, u32), Box<dyn error::Error>> {
let devices = instance.enumerate_physical_devices()?;
let device = devices
.iter()
Expand All @@ -923,16 +934,7 @@ impl Device {
let queue_index = Device::find_compute_queue(instance, device)?;

let device_name = CStr::from_ptr(props.device_name.as_ptr());
let device_name = device_name.to_str().unwrap();
println!(
"Device {} type {} {}-{}-{}-{}",
device_name,
props.device_type.as_raw(),
props.limits.max_push_constants_size,
props.limits.max_bound_descriptor_sets,
props.limits.max_storage_buffer_range,
max_buffer_size
);
let device_name = String::from_utf8_lossy(device_name.to_bytes()).to_string();
// TODO: allow to specify a device name filter/regex?
let score = match props.device_type {
vk::PhysicalDeviceType::DISCRETE_GPU => 3,
Expand All @@ -941,24 +943,25 @@ impl Device {
_ => 0,
};
// Prefer real devices instead of dzn emulation.
let dzn_multiplier = if device_name
let is_dzn = device_name
.to_lowercase()
.starts_with("microsoft direct3d12")
{
1
} else {
10
};
Some((device, device_name, queue_index, score * dzn_multiplier))
.starts_with("microsoft direct3d12");
let score = (score, is_dzn);
Some((device, device_name, queue_index, score))
})
.max_by_key(|(_device, _name, _queue_index, score)| *score);
let (device, name, queue_index) = if let Some((device, name, queue_index, _score)) = device
{
.max_by(|(_, _, _, a), (_, _, _, b)| {
if a.1 && !b.1 {
return Ordering::Less;
} else if !a.1 && b.1 {
return Ordering::Greater;
}
return a.0.cmp(&b.0);
});
let (device, name, queue_index) = if let Some((device, name, queue_index, score)) = device {
(device, name, queue_index)
} else {
return Err(GpuError::new("Device not found").into());
};
println!("selected device {}", name);
Ok((device, name, queue_index))
}

Expand Down Expand Up @@ -1009,6 +1012,7 @@ impl Device {
let max_pixels = img1_pixels.max(img2_pixels);
let mut buffers: Vec<Buffer> = vec![];
let cleanup_err = |buffers: &[Buffer], err| {
println!("buffers count is {}", buffers.len());
buffers.iter().for_each(|buffer| {
device.free_memory(buffer.buffer_memory, None);
device.destroy_buffer(buffer.buffer, None)
Expand Down Expand Up @@ -1095,13 +1099,13 @@ impl Device {
buffer_type: BufferType,
) -> Result<Buffer, Box<dyn error::Error>> {
let size = size as u64;
let gpu_local = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => true,
BufferType::HostSource | BufferType::HostDestination => false,
};
let host_visible = match buffer_type {
BufferType::HostSource | BufferType::HostDestination => true,
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => false,
let required_memory_properties = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => {
vk::MemoryPropertyFlags::DEVICE_LOCAL
}
BufferType::HostSource | BufferType::HostDestination => {
vk::MemoryPropertyFlags::HOST_VISIBLE
}
};
let extra_usage_flags = match buffer_type {
BufferType::HostSource => vk::BufferUsageFlags::TRANSFER_SRC,
Expand All @@ -1122,58 +1126,41 @@ impl Device {
};
let buffer = device.create_buffer(&buffer_create_info, None)?;
let memory_requirements = device.get_buffer_memory_requirements(buffer);
let memory_type_index = memory_properties.memory_types
[..memory_properties.memory_type_count as usize]
.iter()
.enumerate()
.find(|(memory_type_index, memory_type)| {
let buffer_memory = (0..memory_properties.memory_type_count as usize)
.flat_map(|i| {
let memory_type = memory_properties.memory_types[i];
if memory_properties.memory_heaps[memory_type.heap_index as usize].size
< memory_requirements.size
{
return false;
};
if (1 << memory_type_index) & memory_requirements.memory_type_bits == 0 {
return false;
return None;
}

if gpu_local
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::DEVICE_LOCAL)
{
return true;
if ((1 << i) & memory_requirements.memory_type_bits) == 0 {
return None;
}
if host_visible
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::HOST_VISIBLE)
{
return true;
let property_flags = memory_type.property_flags;
if !property_flags.contains(required_memory_properties) {
return None;
}
false
});
let memory_type_index = if let Some((index, _)) = memory_type_index {
index as u32
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index: i as u32,
..Default::default()
};
// Some buffers may fill up, in this case allocating memory can fail.
let mem = device.allocate_memory(&allocate_info, None).ok()?;

Some((mem, host_visible, host_coherent))
})
.next();

let (buffer_memory, host_visible, host_coherent) = if let Some(mem) = buffer_memory {
mem
} else {
device.destroy_buffer(buffer, None);
return Err(GpuError::new("Cannot find suitable memory").into());
};
let property_flags =
memory_properties.memory_types[memory_type_index as usize].property_flags;
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index,
..Default::default()
};
let buffer_memory = device.allocate_memory(&allocate_info, None);
let buffer_memory = match buffer_memory {
Ok(mem) => mem,
Err(err) => {
device.destroy_buffer(buffer, None);
return Err(err.into());
}
};
let result = Buffer {
buffer,
buffer_memory,
Expand Down Expand Up @@ -1205,32 +1192,38 @@ impl Device {
vk::DescriptorSetLayoutCreateInfo::builder().bindings(bindings.as_slice());
device.create_descriptor_set_layout(&layout_info, None)
};
println!("creating descriptor pools 1");
let descriptor_pool_size = [vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_BUFFER)
.descriptor_count(2)
.build()];
let descriptor_pool_info = vk::DescriptorPoolCreateInfo::builder()
.max_sets(1)
.pool_sizes(&descriptor_pool_size);
println!("creating descriptor pools 2");
let descriptor_pool = device.create_descriptor_pool(&descriptor_pool_info, None)?;
let cleanup_err = |err| {
device.destroy_descriptor_pool(descriptor_pool, None);
err
};
let regular_layout = create_layout_bindings(6).map_err(cleanup_err)?;
println!("creating descriptor pools 3");
let cleanup_err = |err| {
device.destroy_descriptor_set_layout(regular_layout, None);
device.destroy_descriptor_pool(descriptor_pool, None);
err
};
let cross_check_layout = create_layout_bindings(2).map_err(cleanup_err)?;
println!("creating descriptor pools 4");
let cleanup_err = |err| {
println!("failed to allocate descriptor set");
device.destroy_descriptor_set_layout(cross_check_layout, None);
device.destroy_descriptor_set_layout(regular_layout, None);
device.destroy_descriptor_pool(descriptor_pool, None);
err
};
let layouts = [regular_layout, cross_check_layout];
println!("creating descriptor pools 5");
let push_constant_ranges = vk::PushConstantRange::builder()
.offset(0)
.size(std::mem::size_of::<ShaderParams>() as u32)
Expand All @@ -1251,12 +1244,14 @@ impl Device {
device.destroy_descriptor_pool(descriptor_pool, None);
err
};
println!("creating descriptor pools 6");
let descriptor_set_allocate_info = vk::DescriptorSetAllocateInfo::builder()
.descriptor_pool(descriptor_pool)
.set_layouts(&layouts);
let descriptor_sets = device
.allocate_descriptor_sets(&descriptor_set_allocate_info)
.map_err(cleanup_err)?;
println!("creating descriptor pools 7");

// TODO: extract this to allow switching direction on the fly.
let create_buffer_infos = |buffers: &[Buffer]| {
Expand Down Expand Up @@ -1398,6 +1393,7 @@ impl Device {
.create_fence(&fence_create_info, None)
.map_err(cleanup_err)?;
let cleanup_err = |err| {
println!("Failed to alloc command buffer");
device.destroy_command_pool(command_pool, None);
device.destroy_fence(fence, None);
err
Expand Down

0 comments on commit 8897931

Please sign in to comment.