diff --git a/Cargo.lock b/Cargo.lock index 5c4ca197c..8fe5771de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1482,7 +1482,7 @@ dependencies = [ [[package]] name = "rust-cuda" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "const-type-layout", "final", @@ -1499,7 +1499,7 @@ dependencies = [ [[package]] name = "rust-cuda-derive" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "proc-macro-error", "proc-macro2", @@ -1510,7 +1510,7 @@ dependencies = [ [[package]] name = "rust-cuda-kernel" version = "0.1.0" -source = "git+https://github.com/juntyr/rust-cuda?rev=dd9507d#dd9507d96ed34bf03a7537d62a693266ea4a8cb5" +source = "git+https://github.com/juntyr/rust-cuda?rev=5e1534c#5e1534cf3c4bd98df88aefbfe647dcd9a519dd65" dependencies = [ "cargo_metadata", "colored", @@ -1601,7 +1601,6 @@ dependencies = [ "necsim-partitioning-mpi", "necsim-plugins-core", "ron", - "rust-cuda", "rustcoalescence-algorithms", "rustcoalescence-algorithms-cuda", "rustcoalescence-algorithms-gillespie", diff --git a/necsim/core/Cargo.toml b/necsim/core/Cargo.toml index ef8b0dccc..f353fabd8 100644 --- a/necsim/core/Cargo.toml +++ b/necsim/core/Cargo.toml @@ -20,7 +20,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"], optional = true } diff --git a/necsim/core/src/landscape/extent.rs b/necsim/core/src/landscape/extent.rs index 12d8a0219..40c1012e5 100644 --- a/necsim/core/src/landscape/extent.rs +++ b/necsim/core/src/landscape/extent.rs @@ -3,10 +3,12 @@ use necsim_core_bond::OffByOneU32; use super::Location; #[allow(clippy::module_name_repetitions, clippy::unsafe_derive_deserialize)] -#[derive(PartialEq, Eq, Copy, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[derive(PartialEq, Eq, Clone, Debug, serde::Deserialize, serde::Serialize, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] +#[repr(C)] +#[cfg_attr(feature = "cuda", cuda(ignore))] #[serde(rename = "Extent")] #[serde(deny_unknown_fields)] -#[repr(C)] pub struct LandscapeExtent { x: u32, y: u32, @@ -58,7 +60,7 @@ impl LandscapeExtent { LocationIterator { x: self.x, y: self.y, - extent: *self, + extent: self.clone(), first_y: true, } } @@ -186,7 +188,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent, + extent: extent.clone(), first_y: true, } ); @@ -200,7 +202,7 @@ mod tests { LocationIterator { x: 0, y: 0, - extent, + extent: extent.clone(), first_y: false, } ); @@ -230,7 +232,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent, + extent: extent.clone(), first_y: true, } ); @@ -242,7 +244,7 @@ mod tests { LocationIterator { x: 0, y: 6812, - extent, + extent: extent.clone(), first_y: true, } ); @@ -255,7 +257,7 @@ mod tests { LocationIterator { x: 1386, y: 6813, - extent, + extent: extent.clone(), first_y: false, } ); @@ -269,7 +271,7 @@ mod tests { LocationIterator { x: 1386, y: 0, - extent, + extent: extent.clone(), first_y: false, } ); @@ -283,7 +285,7 @@ mod tests { LocationIterator { x: 1386, y: 6812, - extent, + extent: extent.clone(), first_y: false, } ); diff --git a/necsim/impls/cuda/Cargo.toml b/necsim/impls/cuda/Cargo.toml index 984ba4a50..1140bfe9f 100644 --- a/necsim/impls/cuda/Cargo.toml +++ b/necsim/impls/cuda/Cargo.toml @@ -15,7 +15,7 @@ contracts = "0.6.3" serde = { version = "1.0", default-features = false, features = ["derive"] } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "host"] } diff --git a/necsim/impls/cuda/src/cogs/rng.rs b/necsim/impls/cuda/src/cogs/rng.rs index 671ebb79f..0bb7feb84 100644 --- a/necsim/impls/cuda/src/cogs/rng.rs +++ b/necsim/impls/cuda/src/cogs/rng.rs @@ -3,7 +3,10 @@ use core::marker::PhantomData; use necsim_core::cogs::{MathsCore, PrimeableRng, RngCore}; use const_type_layout::TypeGraphLayout; -use rust_cuda::safety::{PortableBitSemantics, StackOnly}; +use rust_cuda::{ + safety::{PortableBitSemantics, StackOnly}, + utils::adapter::RustToCudaWithPortableBitCloneSemantics, +}; use serde::{Deserialize, Deserializer, Serialize, Serializer}; @@ -12,38 +15,45 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; #[cuda(free = "M", free = "R")] pub struct CudaRng where - R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, + R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout, { - inner: R, + #[cuda(embed)] + inner: RustToCudaWithPortableBitCloneSemantics, marker: PhantomData, } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone +impl + StackOnly + PortableBitSemantics + TypeGraphLayout + Copy> Copy + for CudaRng +{ +} + +#[allow(clippy::expl_impl_clone_on_copy)] +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Clone for CudaRng { fn clone(&self) -> Self { Self { - inner: self.inner, + inner: self.inner.clone(), marker: PhantomData::, } } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - From for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> From + for CudaRng { #[must_use] #[inline] fn from(rng: R) -> Self { Self { - inner: rng, + inner: rng.into(), marker: PhantomData::, } } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - RngCore for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> RngCore + for CudaRng { type Seed = >::Seed; @@ -51,7 +61,7 @@ impl + Copy + StackOnly + PortableBitSemantics + Typ #[inline] fn from_seed(seed: Self::Seed) -> Self { Self { - inner: R::from_seed(seed), + inner: R::from_seed(seed).into(), marker: PhantomData::, } } @@ -63,10 +73,8 @@ impl + Copy + StackOnly + PortableBitSemantics + Typ } } -impl< - M: MathsCore, - R: PrimeableRng + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, - > PrimeableRng for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> + PrimeableRng for CudaRng { #[inline] fn prime_with(&mut self, location_index: u64, time_index: u64) { @@ -74,22 +82,19 @@ impl< } } -impl + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout> - Serialize for CudaRng +impl + StackOnly + PortableBitSemantics + TypeGraphLayout> Serialize + for CudaRng { fn serialize(&self, serializer: S) -> Result { self.inner.serialize(serializer) } } -impl< - 'de, - M: MathsCore, - R: RngCore + Copy + StackOnly + PortableBitSemantics + TypeGraphLayout, - > Deserialize<'de> for CudaRng +impl<'de, M: MathsCore, R: RngCore + StackOnly + PortableBitSemantics + TypeGraphLayout> + Deserialize<'de> for CudaRng { fn deserialize>(deserializer: D) -> Result { - let inner = R::deserialize(deserializer)?; + let inner = R::deserialize(deserializer)?.into(); Ok(Self { inner, diff --git a/necsim/impls/no-std/Cargo.toml b/necsim/impls/no-std/Cargo.toml index 07f88df49..4465c45ee 100644 --- a/necsim/impls/no-std/Cargo.toml +++ b/necsim/impls/no-std/Cargo.toml @@ -30,7 +30,7 @@ fnv = { version = "1.0", default-features = false, features = [] } rand_core = "0.6" [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final"], optional = true } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "final", "host"], optional = true } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "final", "host"], optional = true } diff --git a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs index e3d51b674..17ac313d0 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/independent.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/independent.rs @@ -43,12 +43,7 @@ pub struct IndependentEventSampler< T: TurnoverRate, N: SpeciationProbability, > { - #[cfg_attr( - feature = "cuda", - cuda( - embed = "Option>" - ) - )] + #[cfg_attr(feature = "cuda", cuda(embed))] min_spec_sample: Option, marker: PhantomData<(M, H, G, X, D, T, N)>, } @@ -84,7 +79,7 @@ impl< { unsafe fn backup_unchecked(&self) -> Self { Self { - min_spec_sample: self.min_spec_sample, + min_spec_sample: self.min_spec_sample.clone(), marker: PhantomData::<(M, H, G, X, D, T, N)>, } } @@ -254,7 +249,7 @@ impl< ) -> Option { // `core::mem::replace()` would be semantically better // - but `clone()` does not spill to local memory - let old_value = self.min_spec_sample; + let old_value = self.min_spec_sample.clone(); self.min_spec_sample = new; diff --git a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs index d541d21a4..5525256ad 100644 --- a/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs +++ b/necsim/impls/no-std/src/cogs/event_sampler/tracking.rs @@ -26,7 +26,8 @@ pub trait MinSpeciationTrackingEventSampler< -> Option; } -#[derive(Clone, Copy, Debug, TypeLayout)] +#[derive(Clone, Debug, TypeLayout)] +#[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[repr(C)] pub struct SpeciationSample { speciation_sample: ClosedOpenUnitF64, diff --git a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs index a1520550b..fb47d7a6d 100644 --- a/necsim/impls/no-std/src/cogs/habitat/in_memory.rs +++ b/necsim/impls/no-std/src/cogs/habitat/in_memory.rs @@ -21,6 +21,7 @@ pub struct InMemoryHabitat { habitat: Final>, #[cfg_attr(feature = "cuda", cuda(embed))] u64_injection: Final>, + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, marker: PhantomData, } @@ -31,7 +32,7 @@ impl Backup for InMemoryHabitat { Self { habitat: Final::new(self.habitat.clone()), u64_injection: Final::new(self.u64_injection.clone()), - extent: self.extent, + extent: self.extent.clone(), marker: PhantomData::, } } diff --git a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs index edbdf23f1..6d83fe75b 100644 --- a/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs +++ b/necsim/impls/no-std/src/cogs/habitat/non_spatial.rs @@ -14,6 +14,7 @@ use necsim_core_bond::{OffByOneU32, OffByOneU64}; #[cfg_attr(feature = "cuda", derive(rust_cuda::lend::LendRustToCuda))] #[cfg_attr(feature = "cuda", cuda(free = "M"))] pub struct NonSpatialHabitat { + #[cfg_attr(feature = "cuda", cuda(embed))] extent: LandscapeExtent, deme: NonZeroU32, marker: PhantomData, @@ -58,7 +59,7 @@ impl NonSpatialHabitat { impl Backup for NonSpatialHabitat { unsafe fn backup_unchecked(&self) -> Self { Self { - extent: self.extent, + extent: self.extent.clone(), deme: self.deme, marker: PhantomData::, } diff --git a/necsim/impls/no-std/src/cogs/rng/seahash.rs b/necsim/impls/no-std/src/cogs/rng/seahash.rs index 6487a3531..1da7d2e9a 100644 --- a/necsim/impls/no-std/src/cogs/rng/seahash.rs +++ b/necsim/impls/no-std/src/cogs/rng/seahash.rs @@ -17,8 +17,6 @@ pub struct SeaHash { marker: PhantomData, } -impl Copy for SeaHash {} - impl RngCore for SeaHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/cogs/rng/wyhash.rs b/necsim/impls/no-std/src/cogs/rng/wyhash.rs index df86272f6..eae21264c 100644 --- a/necsim/impls/no-std/src/cogs/rng/wyhash.rs +++ b/necsim/impls/no-std/src/cogs/rng/wyhash.rs @@ -23,8 +23,6 @@ pub struct WyHash { marker: PhantomData, } -impl Copy for WyHash {} - impl RngCore for WyHash { type Seed = [u8; 8]; diff --git a/necsim/impls/no-std/src/decomposition/equal/area.rs b/necsim/impls/no-std/src/decomposition/equal/area.rs index 8d61e05f2..1e418c019 100644 --- a/necsim/impls/no-std/src/decomposition/equal/area.rs +++ b/necsim/impls/no-std/src/decomposition/equal/area.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by area, otherwise returns `Err(Self)`. pub fn area(habitat: &H, subdomain: Partition) -> Result { - let extent = *habitat.get_extent(); + let extent = habitat.get_extent().clone(); let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/necsim/impls/no-std/src/decomposition/equal/mod.rs b/necsim/impls/no-std/src/decomposition/equal/mod.rs index d1ed79319..885c88103 100644 --- a/necsim/impls/no-std/src/decomposition/equal/mod.rs +++ b/necsim/impls/no-std/src/decomposition/equal/mod.rs @@ -34,7 +34,7 @@ impl> Backup for EqualDecomposition { unsafe fn backup_unchecked(&self) -> Self { Self { subdomain: self.subdomain, - extent: self.extent, + extent: self.extent.clone(), morton: self.morton, indices: self.indices.clone(), _marker: PhantomData::<(M, H)>, diff --git a/necsim/impls/no-std/src/decomposition/equal/weight.rs b/necsim/impls/no-std/src/decomposition/equal/weight.rs index a28dbffe1..cc5ec1e86 100644 --- a/necsim/impls/no-std/src/decomposition/equal/weight.rs +++ b/necsim/impls/no-std/src/decomposition/equal/weight.rs @@ -12,7 +12,7 @@ impl> EqualDecomposition { /// Returns `Ok(Self)` iff the `habitat` can be partitioned into /// `subdomain.size()` by weight, otherwise returns `Err(Self)`. pub fn weight(habitat: &H, subdomain: Partition) -> Result { - let extent = *habitat.get_extent(); + let extent = habitat.get_extent().clone(); let mut total_habitat = 0; let mut indices = Vec::with_capacity(subdomain.size().get() as usize); diff --git a/rustcoalescence/Cargo.toml b/rustcoalescence/Cargo.toml index 71c16c9b5..2a367eb33 100644 --- a/rustcoalescence/Cargo.toml +++ b/rustcoalescence/Cargo.toml @@ -14,7 +14,7 @@ necsim-partitioning-mpi = ["dep:necsim-partitioning-mpi"] rustcoalescence-algorithms-gillespie = ["dep:rustcoalescence-algorithms-gillespie"] rustcoalescence-algorithms-independent = ["dep:rustcoalescence-algorithms-independent"] -rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda", "dep:rust-cuda"] +rustcoalescence-algorithms-cuda = ["dep:rustcoalescence-algorithms-cuda"] [dependencies] necsim-core = { path = "../necsim/core" } @@ -34,8 +34,6 @@ rustcoalescence-algorithms-gillespie = { path = "algorithms/gillespie", optional rustcoalescence-algorithms-independent = { path = "algorithms/independent", optional = true } rustcoalescence-algorithms-cuda = { path = "algorithms/cuda", optional = true } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = [], optional = true } - clap = { version = "4.0", features = ["derive"] } anyhow = "1.0" serde = { version = "1.0", features = ["derive"] } diff --git a/rustcoalescence/algorithms/cuda/Cargo.toml b/rustcoalescence/algorithms/cuda/Cargo.toml index b51090971..e174cb6df 100644 --- a/rustcoalescence/algorithms/cuda/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/Cargo.toml @@ -23,4 +23,4 @@ thiserror = "1.0" serde = { version = "1.0", features = ["derive"] } serde_state = "0.4" serde_derive_state = "0.4" -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml index ae5937ec8..88fe2ff9b 100644 --- a/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/cpu-kernel/Cargo.toml @@ -14,4 +14,4 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } rustcoalescence-algorithms-cuda-gpu-kernel = { path = "../gpu-kernel" } -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["host"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["host"] } diff --git a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml index f0a8873c0..544fe2511 100644 --- a/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml +++ b/rustcoalescence/algorithms/cuda/gpu-kernel/Cargo.toml @@ -17,7 +17,7 @@ necsim-impls-no-std = { path = "../../../../necsim/impls/no-std", features = ["c necsim-impls-cuda = { path = "../../../../necsim/impls/cuda" } [target.'cfg(target_os = "cuda")'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "device", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "device", "kernel"] } [target.'cfg(not(target_os = "cuda"))'.dependencies] -rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "dd9507d", features = ["derive", "kernel"] } +rust-cuda = { git = "https://github.com/juntyr/rust-cuda", rev = "5e1534c", features = ["derive", "kernel"] } diff --git a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs index eccfb8aba..e28cd0ef0 100644 --- a/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs +++ b/rustcoalescence/algorithms/cuda/src/parallelisation/monolithic.rs @@ -172,7 +172,6 @@ pub fn simulate< HostAndDeviceMutRef::with_new(&mut total_time_max, |total_time_max| -> Result<()> { HostAndDeviceMutRef::with_new(&mut total_steps_sum, |total_steps_sum| -> Result<()> { - // TODO: Pipeline async launches and callbacks of simulation/event analysis simulation.lend_to_cuda(|simulation_cuda_repr| -> Result<()> { while !slow_lineages.is_empty() && pause_before.map_or(true, |pause_before| level_time < pause_before) @@ -304,6 +303,7 @@ pub fn simulate< } event_buffer = event_buffer_host_async.synchronize()?; + // TODO: explore partial sorting on the GPU event_buffer.report_events_unordered(&mut proxy); proxy.local_partition().get_reporter().report_progress( diff --git a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs index 6591e1d91..54715cf40 100644 --- a/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs +++ b/rustcoalescence/src/cli/simulate/dispatch/valid/algorithm_scenario.rs @@ -105,7 +105,6 @@ macro_rules! match_scenario_algorithm { } #[allow(clippy::too_many_arguments)] -#[allow(clippy::too_many_lines)] // FIXME pub(super) fn dispatch<'p, R: Reporter, P: LocalPartition<'p, R>>( local_partition: P,