From b506127649c52b1264af4f777ac03d0eb36acced Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Thu, 26 Sep 2024 09:15:50 +0000 Subject: [PATCH 01/10] Try switching from rustacuda to cust --- .github/workflows/rustdoc.yml | 5 ++- Cargo.toml | 26 ++++++------ examples/print/src/main.rs | 20 +++++----- rust-cuda-derive/src/rust_to_cuda/impl.rs | 8 ++-- src/deps.rs | 4 +- src/host/mod.rs | 33 +++++++++------- src/kernel/mod.rs | 33 +++++++++------- src/kernel/param.rs | 48 +++++++++++------------ src/lend/impls/arc.rs | 18 ++++----- src/lend/impls/arced_slice.rs | 26 +++++++----- src/lend/impls/box.rs | 26 ++++++------ src/lend/impls/boxed_slice.rs | 16 ++++---- src/lend/impls/final.rs | 8 ++-- src/lend/impls/option.rs | 2 +- src/lend/impls/ref.rs | 18 ++++----- src/lend/impls/ref_mut.rs | 8 ++-- src/lend/impls/slice_ref.rs | 10 ++--- src/lend/impls/slice_ref_mut.rs | 8 ++-- src/lend/mod.rs | 22 +++++------ src/utils/adapter.rs | 18 ++++----- src/utils/aliasing/const.rs | 8 ++-- src/utils/aliasing/dynamic.rs | 8 ++-- src/utils/async.rs | 10 ++--- src/utils/exchange/buffer/host.rs | 26 ++++++------ src/utils/exchange/buffer/mod.rs | 16 ++++---- src/utils/exchange/wrapper.rs | 20 +++++----- 26 files changed, 229 insertions(+), 216 deletions(-) diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index 046a89cee..f0d7b683b 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -37,8 +37,9 @@ jobs: --enable-index-page \ --extern-html-root-url const_type_layout=https://docs.rs/const-type-layout/0.3.2/ \ --extern-html-root-url final=https://docs.rs/final/0.1.1/ \ - --extern-html-root-url rustacuda=https://docs.rs/rustacuda/0.1.3/ \ - --extern-html-root-url rustacuda_core=https://docs.rs/rustacuda_core/0.1.2/ \ + --extern-html-root-url cust=https://docs.rs/cust/0.3.2/ \ + --extern-html-root-url cust_core=https://docs.rs/cust_core/0.1/ \ + --extern-html-root-url cust_derive=https://docs.rs/cust_derive/0.2/ \ -Zunstable-options \ " cargo doc \ --all-features \ diff --git a/Cargo.toml b/Cargo.toml index f4d43727a..450c0a989 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,22 +89,22 @@ rust-version = { workspace = true } [features] default = [] -derive = ["dep:rust-cuda-derive"] +derive = ["dep:cust_derive", "dep:rust-cuda-derive"] device = [] final = ["dep:final"] -host = ["dep:rustacuda", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] +host = ["dep:cust", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] kernel = ["dep:rust-cuda-kernel"] [dependencies] -const-type-layout = { workspace = true, features = ["derive"] } -final = { workspace = true, optional = true } -oneshot = { workspace = true, features = ["std", "async"], optional = true } -regex = { workspace = true, optional = true } -rustacuda = { workspace = true, optional = true } -rustacuda_core = { workspace = true } -rust-cuda-derive = { workspace = true, optional = true } -rust-cuda-kernel = { workspace = true, optional = true } -safer_owning_ref = { workspace = true, optional = true } +const-type-layout = { version = "0.3.2", default-features = false, features = ["derive"] } +# FIXME: cust fails to compile without the `bytemuck` feature +cust = { version = "0.3.2", default-features = false, features = ["bytemuck"], optional = true } +cust_core = { version = "0.1", default-features = false } +cust_derive = { version = "0.2", default-features = false, optional = true } +final = { version = "0.1.1", default-features = false, optional = true } +oneshot = { version = "0.1", default-features = false, features = ["std", "async"], optional = true } +regex = { version = "1.10", default-features = false, optional = true } +safer_owning_ref = { version = "0.5", default-features = false, optional = true } -[lints] -workspace = true +rust-cuda-derive = { path = "rust-cuda-derive", default-features = false, optional = true } +rust-cuda-kernel = { path = "rust-cuda-kernel", default-features = false, optional = true } diff --git a/examples/print/src/main.rs b/examples/print/src/main.rs index 008b39f5b..1998a7057 100644 --- a/examples/print/src/main.rs +++ b/examples/print/src/main.rs @@ -2,38 +2,38 @@ use print::{kernel, link, Action}; -fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { +fn main() -> rust_cuda::deps::cust::error::CudaResult<()> { // Link the non-generic CUDA kernel struct KernelPtx; link! { impl kernel for KernelPtx } // Initialize the CUDA API - rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?; + rust_cuda::deps::cust::init(rust_cuda::deps::cust::CudaFlags::empty())?; // Get the first CUDA GPU device - let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?; + let device = rust_cuda::deps::cust::device::Device::get_device(0)?; // Create a CUDA context associated to this device let _context = rust_cuda::host::CudaDropWrapper::from( - rust_cuda::deps::rustacuda::context::Context::create_and_push( - rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST - | rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO, + rust_cuda::deps::cust::context::Context::create_and_push( + rust_cuda::deps::cust::context::ContextFlags::MAP_HOST + | rust_cuda::deps::cust::context::ContextFlags::SCHED_AUTO, device, )?, ); // Create a new CUDA stream to submit kernels to let mut stream = - rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new( - rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING, + rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::cust::stream::Stream::new( + rust_cuda::deps::cust::stream::StreamFlags::NON_BLOCKING, None, )?); // Create a new instance of the CUDA kernel and prepare the launch config let mut kernel = rust_cuda::kernel::TypedPtxKernel::::new::(None); let config = rust_cuda::kernel::LaunchConfig { - grid: rust_cuda::deps::rustacuda::function::GridSize::x(1), - block: rust_cuda::deps::rustacuda::function::BlockSize::x(4), + grid: rust_cuda::deps::cust::function::GridSize::x(1), + block: rust_cuda::deps::cust::function::BlockSize::x(4), ptx_jit: false, }; diff --git a/rust-cuda-derive/src/rust_to_cuda/impl.rs b/rust-cuda-derive/src/rust_to_cuda/impl.rs index 55b0948d7..56dc3dcca 100644 --- a/rust-cuda-derive/src/rust_to_cuda/impl.rs +++ b/rust-cuda-derive/src/rust_to_cuda/impl.rs @@ -84,7 +84,7 @@ pub fn rust_to_cuda_trait( unsafe fn borrow( &self, alloc: CudaAllocType, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::ffi::DeviceAccessible, #crate_path::alloc::CombinedCudaAlloc )> { @@ -107,7 +107,7 @@ pub fn rust_to_cuda_trait( alloc: #crate_path::alloc::CombinedCudaAlloc< Self::CudaAllocation, CudaAllocType >, - ) -> #crate_path::deps::rustacuda::error::CudaResult { + ) -> #crate_path::deps::cust::error::CudaResult { let (alloc_front, alloc_tail) = alloc.split(); #(#r2c_field_destructors)* @@ -192,7 +192,7 @@ pub fn rust_to_cuda_async_trait( &self, alloc: CudaAllocType, stream: #crate_path::host::Stream<'stream>, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::r#async::Async< '_, 'stream, #crate_path::utils::ffi::DeviceAccessible, @@ -220,7 +220,7 @@ pub fn rust_to_cuda_async_trait( Self::CudaAllocationAsync, CudaAllocType >, stream: #crate_path::host::Stream<'stream>, - ) -> #crate_path::deps::rustacuda::error::CudaResult<( + ) -> #crate_path::deps::cust::error::CudaResult<( #crate_path::utils::r#async::Async< 'a, 'stream, #crate_path::deps::owning_ref::BoxRefMut<'a, CudaRestoreOwner, Self>, diff --git a/src/deps.rs b/src/deps.rs index 50fd38f3f..8521ed267 100644 --- a/src/deps.rs +++ b/src/deps.rs @@ -7,6 +7,6 @@ pub extern crate const_type_layout; pub extern crate owning_ref; #[cfg(feature = "host")] -pub extern crate rustacuda; +pub extern crate cust; -pub extern crate rustacuda_core; +pub extern crate cust_core; diff --git a/src/host/mod.rs b/src/host/mod.rs index c2d0558c4..782b589f8 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -5,13 +5,14 @@ use std::{ }; use const_type_layout::TypeGraphLayout; -use rustacuda::{ +use cust::{ context::Context, error::CudaError, event::Event, memory::{CopyDestination, DeviceBox, DeviceBuffer, LockedBox, LockedBuffer}, module::Module, }; +use cust_core::DeviceCopy; use crate::{ safety::PortableBitSemantics, @@ -30,12 +31,12 @@ type InvariantLifetime<'brand> = PhantomData &'brand ()>; #[derive(Copy, Clone)] #[repr(transparent)] pub struct Stream<'stream> { - stream: &'stream rustacuda::stream::Stream, + stream: &'stream cust::stream::Stream, _brand: InvariantLifetime<'stream>, } impl<'stream> Deref for Stream<'stream> { - type Target = rustacuda::stream::Stream; + type Target = cust::stream::Stream; fn deref(&self) -> &Self::Target { self.stream @@ -65,7 +66,7 @@ impl<'stream> Stream<'stream> { /// } /// ``` pub fn with( - stream: &mut rustacuda::stream::Stream, + stream: &mut cust::stream::Stream, inner: impl for<'new_stream> FnOnce(Stream<'new_stream>) -> O, ) -> O { inner(Stream { @@ -77,7 +78,7 @@ impl<'stream> Stream<'stream> { pub trait CudaDroppable: Sized { #[expect(clippy::missing_errors_doc)] - fn drop(val: Self) -> Result<(), (rustacuda::error::CudaError, Self)>; + fn drop(val: Self) -> Result<(), (cust::error::CudaError, Self)>; } #[repr(transparent)] @@ -112,25 +113,27 @@ impl DerefMut for CudaDropWrapper { } } -impl CudaDroppable for DeviceBox { +impl CudaDroppable for DeviceBox { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } } -impl CudaDroppable for DeviceBuffer { +impl CudaDroppable for DeviceBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } } -impl CudaDroppable for LockedBox { +impl CudaDroppable for LockedBox { fn drop(val: Self) -> Result<(), (CudaError, Self)> { - Self::drop(val) + // FIXME: cust's LockedBox no longer has a fallible drop + std::mem::drop(val); + Ok(()) } } -impl CudaDroppable for LockedBuffer { +impl CudaDroppable for LockedBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } @@ -147,7 +150,7 @@ macro_rules! impl_sealed_drop_value { } impl_sealed_drop_value!(Module); -impl_sealed_drop_value!(rustacuda::stream::Stream); +impl_sealed_drop_value!(cust::stream::Stream); impl_sealed_drop_value!(Context); impl_sealed_drop_value!(Event); @@ -207,7 +210,7 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> { 'a: 'b, { DeviceMutRef { - pointer: DeviceMutPointer(self.device_box.as_device_ptr().as_raw_mut().cast()), + pointer: DeviceMutPointer(self.device_box.as_device_ptr().as_mut_ptr().cast()), reference: PhantomData, } } @@ -322,10 +325,10 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceConstRef<'a, T> where 'a: 'b, { - let mut hack = ManuallyDrop::new(unsafe { std::ptr::read(self.device_box) }); + let hack = ManuallyDrop::new(unsafe { std::ptr::read(self.device_box) }); DeviceConstRef { - pointer: DeviceConstPointer(hack.as_device_ptr().as_raw().cast()), + pointer: DeviceConstPointer(hack.as_device_ptr().as_ptr().cast()), reference: PhantomData, } } @@ -390,7 +393,7 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceOwned<'a, T> { #[must_use] pub(crate) fn for_device(self) -> DeviceOwnedRef<'a, T> { DeviceOwnedRef { - pointer: DeviceOwnedPointer(self.device_box.as_device_ptr().as_raw_mut().cast()), + pointer: DeviceOwnedPointer(self.device_box.as_device_ptr().as_mut_ptr().cast()), marker: PhantomData::, reference: PhantomData::<&'a mut ()>, } diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 44b4c6216..95d21457a 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -1,3 +1,4 @@ +use core::str; #[cfg(feature = "host")] use std::{ ffi::{CStr, CString}, @@ -6,8 +7,9 @@ use std::{ ptr::NonNull, }; +use cust::module::{ModuleJitOption, OptLevel}; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::{CudaError, CudaResult}, function::Function, module::Module, @@ -42,12 +44,7 @@ mod sealed { #[cfg(all(feature = "host", not(doc)))] #[doc(hidden)] -pub trait WithNewAsync< - 'stream, - P: ?Sized + CudaKernelParameter, - O, - E: From, -> +pub trait WithNewAsync<'stream, P: ?Sized + CudaKernelParameter, O, E: From> { fn with<'b>(self, param: P::AsyncHostType<'stream, 'b>) -> Result where @@ -59,7 +56,7 @@ impl< 'stream, P: ?Sized + CudaKernelParameter, O, - E: From, + E: From, F: for<'b> FnOnce(P::AsyncHostType<'stream, 'b>) -> Result, > WithNewAsync<'stream, P, O, E> for F { @@ -109,7 +106,7 @@ pub trait CudaKernelParameter: sealed::Sealed { #[cfg(feature = "host")] #[expect(clippy::missing_errors_doc)] // FIXME - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl WithNewAsync<'stream, Self, O, E>, @@ -139,7 +136,7 @@ pub trait CudaKernelParameter: sealed::Sealed { #[doc(hidden)] #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -286,8 +283,8 @@ impl<'stream, 'kernel, Kernel> Launcher<'stream, 'kernel, Kernel> { #[cfg(feature = "host")] #[derive(Clone, Debug, PartialEq, Eq)] pub struct LaunchConfig { - pub grid: rustacuda::function::GridSize, - pub block: rustacuda::function::BlockSize, + pub grid: cust::function::GridSize, + pub block: cust::function::BlockSize, pub ptx_jit: bool, } @@ -305,9 +302,15 @@ impl RawPtxKernel { /// Returns a [`CudaError`] if `ptx` is not a valid PTX source, or it does /// not contain an entry point named `entry_point`. pub fn new(ptx: &CStr, entry_point: &CStr) -> CudaResult { - let module: Box = Box::new(Module::load_from_string(ptx)?); - - let function = unsafe { &*std::ptr::from_ref(module.as_ref()) }.get_function(entry_point); + let module: Box = Box::new(Module::from_ptx_cstr( + ptx, + &[ModuleJitOption::OptLevel(OptLevel::O4)], + )?); + + // FIXME: cust's Module::get_function takes a str and turns it back into + // a CString immediately + let function = unsafe { &*std::ptr::from_ref(module.as_ref()) } + .get_function(unsafe { str::from_utf8_unchecked(entry_point.to_bytes()) }); let function = match function { Ok(function) => function, diff --git a/src/kernel/param.rs b/src/kernel/param.rs index 2ad1b0bf8..6d95224dc 100644 --- a/src/kernel/param.rs +++ b/src/kernel/param.rs @@ -88,7 +88,7 @@ impl< type SyncHostType = T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -124,7 +124,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -180,7 +180,7 @@ impl< type SyncHostType = &'a T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -219,7 +219,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -273,7 +273,7 @@ impl< type SyncHostType = <&'a PerThreadShallowCopy as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -315,7 +315,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -403,7 +403,7 @@ impl< type SyncHostType = &'a mut T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -442,7 +442,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -549,7 +549,7 @@ impl< type SyncHostType = T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -585,7 +585,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -644,7 +644,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -683,7 +683,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -737,7 +737,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter type SyncHostType = &'a mut T; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -781,7 +781,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( mut param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -835,7 +835,7 @@ impl< type SyncHostType = as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -862,7 +862,7 @@ impl< } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -926,7 +926,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -968,7 +968,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -1017,7 +1017,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter type SyncHostType = <&'a mut DeepPerThreadBorrow as CudaKernelParameter>::SyncHostType; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1064,7 +1064,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, token: sealed::Token, ) -> Result, E> @@ -1154,7 +1154,7 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa type SyncHostType = Self; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1190,7 +1190,7 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( _param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> @@ -1241,7 +1241,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete type SyncHostType = Self; #[cfg(feature = "host")] - fn with_new_async<'stream, 'b, O, E: From>( + fn with_new_async<'stream, 'b, O, E: From>( param: Self::SyncHostType, _stream: crate::host::Stream<'stream>, #[cfg(not(doc))] inner: impl super::WithNewAsync<'stream, Self, O, E>, @@ -1277,7 +1277,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete } #[cfg(feature = "host")] - fn async_to_ffi<'stream, 'b, E: From>( + fn async_to_ffi<'stream, 'b, E: From>( param: Self::AsyncHostType<'stream, 'b>, _token: sealed::Token, ) -> Result, E> diff --git a/src/lend/impls/arc.rs b/src/lend/impls/arc.rs index 4d59837ff..9bb3e1cb0 100644 --- a/src/lend/impls/arc.rs +++ b/src/lend/impls/arc.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ deps::alloc::sync::Arc, @@ -65,13 +65,13 @@ unsafe impl RustToCuda for Arc { let offset = std::mem::offset_of!(_ArcInner, data); let arc_ptr: *const _ArcInner = data_ptr.byte_sub(offset).cast(); - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&*arc_ptr), )?); Ok(( DeviceAccessible::from(ArcCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), CombinedCudaAlloc::new(device_box, alloc), )) @@ -101,11 +101,11 @@ unsafe impl RustToCudaAsync for Arc, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { let inner = ManuallyDrop::new(_ArcInner { @@ -114,12 +114,12 @@ unsafe impl RustToCudaAsync for Arc>>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref(DeviceCopyWithPortableBitSemantics::from_ref(&inner)), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); @@ -129,12 +129,12 @@ unsafe impl RustToCudaAsync for Arc>>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(ArcCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), stream, NoCompletion, diff --git a/src/lend/impls/arced_slice.rs b/src/lend/impls/arced_slice.rs index cce12b3cd..1fc334589 100644 --- a/src/lend/impls/arced_slice.rs +++ b/src/lend/impls/arced_slice.rs @@ -5,12 +5,12 @@ use std::mem::{ManuallyDrop, MaybeUninit}; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::CudaResult, memory::LockedBuffer, memory::{DeviceBox, DeviceBuffer}, }; -use rustacuda_core::DeviceCopy; +use cust_core::DeviceCopy; use crate::{ deps::alloc::sync::Arc, @@ -51,10 +51,17 @@ pub struct _ArcInner { data: T, } +#[derive(Copy, Clone)] #[repr(C)] struct _ArcInnerHeader { - strong: AtomicUsize, - weak: AtomicUsize, + strong: _AtomicUsize, + weak: _AtomicUsize, +} + +#[derive(Copy, Clone)] +#[repr(C, align(8))] +struct _AtomicUsize { + v: usize, } unsafe impl DeviceCopy for _ArcInnerHeader {} @@ -74,8 +81,7 @@ unsafe impl RustToCuda for Arc<[T]> { DeviceAccessible, CombinedCudaAlloc, )> { - use rustacuda::memory::{CopyDestination, DeviceSlice}; - use rustacuda_core::DevicePointer; + use cust::memory::{CopyDestination, DevicePointer, DeviceSlice}; let data_ptr: *const T = std::ptr::from_ref(&**self).as_ptr(); let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); @@ -105,7 +111,7 @@ unsafe impl RustToCuda for Arc<[T]> { Ok(( DeviceAccessible::from(ArcedSliceCudaRepresentation { - data: DeviceOwnedPointer(header.as_device_ptr().as_raw_mut().cast()), + data: DeviceOwnedPointer(header.as_device_ptr().as_mut_ptr().cast()), len: self.len(), }), CombinedCudaAlloc::new(device_buffer, alloc), @@ -136,11 +142,11 @@ unsafe impl RustToCudaAsync for Arc<[ &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let data_ptr: *const T = std::ptr::from_ref(&**self).as_ptr(); let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); @@ -187,7 +193,7 @@ unsafe impl RustToCudaAsync for Arc<[ data: DeviceOwnedPointer( device_buffer .as_device_ptr() - .as_raw_mut() + .as_mut_ptr() .byte_add(header_len * std::mem::size_of::() - offset) .cast(), ), diff --git a/src/lend/impls/box.rs b/src/lend/impls/box.rs index 305072a34..9c16f07a4 100644 --- a/src/lend/impls/box.rs +++ b/src/lend/impls/box.rs @@ -4,7 +4,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ deps::alloc::boxed::Box, @@ -47,13 +47,13 @@ unsafe impl RustToCuda for Box { DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(BoxCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), CombinedCudaAlloc::new(device_box, alloc), )) @@ -64,7 +64,7 @@ unsafe impl RustToCuda for Box { &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); @@ -90,20 +90,20 @@ unsafe impl RustToCudaAsync for Box, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref::(&**self) .cast::>>(), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); uninit @@ -112,12 +112,12 @@ unsafe impl RustToCudaAsync for Box>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(BoxCudaRepresentation(DeviceOwnedPointer( - device_box.as_device_ptr().as_raw_mut().cast(), + device_box.as_device_ptr().as_mut_ptr().cast(), ))), stream, NoCompletion, @@ -135,12 +135,12 @@ unsafe impl RustToCudaAsync for Box, CompletionFnMut<'a, Self>>, A, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let (alloc_front, alloc_tail) = alloc.split(); let (mut locked_box, device_box) = alloc_front.split(); - device_box.async_copy_to(&mut *locked_box, &stream)?; + device_box.async_copy_to(&mut **locked_box, &stream)?; let r#async = crate::utils::r#async::Async::<_, CompletionFnMut<'a, Self>>::pending( this, @@ -151,7 +151,7 @@ unsafe impl RustToCudaAsync for Box> doesn't drop T unsafe { - std::ptr::copy_nonoverlapping(locked_box.as_ptr().cast::(), data, 1); + std::ptr::copy_nonoverlapping(locked_box.as_raw().cast::(), data, 1); } std::mem::drop(locked_box); Ok(()) diff --git a/src/lend/impls/boxed_slice.rs b/src/lend/impls/boxed_slice.rs index b2c22765c..8b0937b06 100644 --- a/src/lend/impls/boxed_slice.rs +++ b/src/lend/impls/boxed_slice.rs @@ -7,7 +7,7 @@ use crate::{deps::alloc::boxed::Box, lend::RustToCudaAsync, utils::ffi::DeviceOw use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -51,13 +51,13 @@ unsafe impl RustToCuda for Box<[T]> { DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( + let device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( DeviceCopyWithPortableBitSemantics::from_slice(self), )?); Ok(( DeviceAccessible::from(BoxedSliceCudaRepresentation { - data: DeviceOwnedPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceOwnedPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::, }), @@ -70,7 +70,7 @@ unsafe impl RustToCuda for Box<[T]> { &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); @@ -96,11 +96,11 @@ unsafe impl RustToCudaAsync for Box<[ &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_buffer = unsafe { let mut uninit = CudaDropWrapper::from(LockedBuffer::< @@ -124,7 +124,7 @@ unsafe impl RustToCudaAsync for Box<[ Ok(( Async::pending( DeviceAccessible::from(BoxedSliceCudaRepresentation { - data: DeviceOwnedPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceOwnedPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::, }), @@ -144,7 +144,7 @@ unsafe impl RustToCudaAsync for Box<[ Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let (alloc_front, alloc_tail) = alloc.split(); let (mut locked_buffer, device_buffer) = alloc_front.split(); diff --git a/src/lend/impls/final.rs b/src/lend/impls/final.rs index fa83de5a2..51b228c24 100644 --- a/src/lend/impls/final.rs +++ b/src/lend/impls/final.rs @@ -19,7 +19,7 @@ unsafe impl RustToCuda for Final { unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -35,7 +35,7 @@ unsafe impl RustToCuda for Final { unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail) = alloc.split(); Ok(alloc_tail) } @@ -49,7 +49,7 @@ unsafe impl RustToCudaAsync for Final { &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -76,7 +76,7 @@ unsafe impl RustToCudaAsync for Final { this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/lend/impls/option.rs b/src/lend/impls/option.rs index 1997822a2..931c7e952 100644 --- a/src/lend/impls/option.rs +++ b/src/lend/impls/option.rs @@ -3,7 +3,7 @@ use core::mem::MaybeUninit; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::error::CudaResult; +use cust::error::CudaResult; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync, RustToCudaProxy}, diff --git a/src/lend/impls/ref.rs b/src/lend/impls/ref.rs index 43358c546..99318e055 100644 --- a/src/lend/impls/ref.rs +++ b/src/lend/impls/ref.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; +use cust::{error::CudaResult, memory::DeviceBox, memory::LockedBox}; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync}, @@ -48,13 +48,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a T DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(RefCudaRepresentation { - data: DeviceConstPointer(device_box.as_device_ptr().as_raw().cast()), + data: DeviceConstPointer(device_box.as_device_ptr().as_ptr().cast()), _marker: PhantomData::<&'a T>, }), CombinedCudaAlloc::new(device_box, alloc), @@ -85,20 +85,20 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_box = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); std::ptr::copy_nonoverlapping( std::ptr::from_ref::(&**self) .cast::>>(), - uninit.as_mut_ptr(), + uninit.as_raw(), 1, ); uninit @@ -107,12 +107,12 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & let mut device_box = CudaDropWrapper::from(DeviceBox::< DeviceCopyWithPortableBitSemantics>, >::uninitialized()?); - device_box.async_copy_from(&*locked_box, &stream)?; + device_box.async_copy_from(&**locked_box, &stream)?; Ok(( Async::pending( DeviceAccessible::from(RefCudaRepresentation { - data: DeviceConstPointer(device_box.as_device_ptr().as_raw().cast()), + data: DeviceConstPointer(device_box.as_device_ptr().as_ptr().cast()), _marker: PhantomData::<&T>, }), stream, diff --git a/src/lend/impls/ref_mut.rs b/src/lend/impls/ref_mut.rs index ca9830c75..6945c9bc2 100644 --- a/src/lend/impls/ref_mut.rs +++ b/src/lend/impls/ref_mut.rs @@ -3,7 +3,7 @@ use core::marker::PhantomData; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBox}; +use cust::{error::CudaResult, memory::DeviceBox}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -45,13 +45,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_box = CudaDropWrapper::from(DeviceBox::new( + let device_box = CudaDropWrapper::from(DeviceBox::new( DeviceCopyWithPortableBitSemantics::from_ref(&**self), )?); Ok(( DeviceAccessible::from(RefMutCudaRepresentation { - data: DeviceMutPointer(device_box.as_device_ptr().as_raw_mut().cast()), + data: DeviceMutPointer(device_box.as_device_ptr().as_mut_ptr().cast()), _marker: PhantomData::<&'a mut T>, }), CombinedCudaAlloc::new(device_box, alloc), @@ -63,7 +63,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); diff --git a/src/lend/impls/slice_ref.rs b/src/lend/impls/slice_ref.rs index 0a97b673f..53558a17c 100644 --- a/src/lend/impls/slice_ref.rs +++ b/src/lend/impls/slice_ref.rs @@ -5,7 +5,7 @@ use std::mem::ManuallyDrop; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer, memory::LockedBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda, RustToCudaAsync}, @@ -56,7 +56,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a [T Ok(( DeviceAccessible::from(SliceRefCudaRepresentation { - data: DeviceConstPointer(device_buffer.as_ptr().cast()), + data: DeviceConstPointer(device_buffer.as_device_ptr().as_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a [T]>, }), @@ -88,11 +88,11 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { - use rustacuda::memory::AsyncCopyDestination; + use cust::memory::AsyncCopyDestination; let locked_buffer = unsafe { let mut uninit = CudaDropWrapper::from(LockedBuffer::< @@ -116,7 +116,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for & Ok(( Async::pending( DeviceAccessible::from(SliceRefCudaRepresentation { - data: DeviceConstPointer(device_buffer.as_ptr().cast()), + data: DeviceConstPointer(device_buffer.as_device_ptr().as_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a [T]>, }), diff --git a/src/lend/impls/slice_ref_mut.rs b/src/lend/impls/slice_ref_mut.rs index 0300735cd..59d9eeff5 100644 --- a/src/lend/impls/slice_ref_mut.rs +++ b/src/lend/impls/slice_ref_mut.rs @@ -3,7 +3,7 @@ use core::marker::PhantomData; use const_type_layout::{TypeGraphLayout, TypeLayout}; #[cfg(feature = "host")] -use rustacuda::{error::CudaResult, memory::DeviceBuffer}; +use cust::{error::CudaResult, memory::DeviceBuffer}; use crate::{ lend::{CudaAsRust, RustToCuda}, @@ -47,13 +47,13 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu DeviceAccessible, CombinedCudaAlloc, )> { - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( + let device_buffer = CudaDropWrapper::from(DeviceBuffer::from_slice( DeviceCopyWithPortableBitSemantics::from_slice(self), )?); Ok(( DeviceAccessible::from(SliceRefMutCudaRepresentation { - data: DeviceMutPointer(device_buffer.as_mut_ptr().cast()), + data: DeviceMutPointer(device_buffer.as_device_ptr().as_mut_ptr().cast()), len: device_buffer.len(), _marker: PhantomData::<&'a mut [T]>, }), @@ -66,7 +66,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a mu &mut self, alloc: CombinedCudaAlloc, ) -> CudaResult { - use rustacuda::memory::CopyDestination; + use cust::memory::CopyDestination; let (alloc_front, alloc_tail) = alloc.split(); diff --git a/src/lend/mod.rs b/src/lend/mod.rs index 3bca11f75..7296473e9 100644 --- a/src/lend/mod.rs +++ b/src/lend/mod.rs @@ -1,6 +1,6 @@ use const_type_layout::TypeGraphLayout; #[cfg(feature = "host")] -use rustacuda::error::CudaError; +use cust::error::CudaError; #[cfg(feature = "derive")] #[expect(clippy::module_name_repetitions)] @@ -34,7 +34,7 @@ pub unsafe trait RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -46,7 +46,7 @@ pub unsafe trait RustToCuda { unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )>; @@ -55,7 +55,7 @@ pub unsafe trait RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -64,7 +64,7 @@ pub unsafe trait RustToCuda { unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult; + ) -> cust::error::CudaResult; } /// # Safety @@ -78,7 +78,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -101,7 +101,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )>; @@ -110,7 +110,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { #[cfg(feature = "host")] /// # Errors /// - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA /// /// # Safety @@ -127,7 +127,7 @@ pub unsafe trait RustToCudaAsync: RustToCuda { this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )>; @@ -187,7 +187,7 @@ pub trait LendToCuda: RustToCuda { /// /// # Errors /// - /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA + /// Returns a `cust::errors::CudaError` iff an error occurs inside CUDA fn lend_to_cuda_mut< O, E: From, @@ -339,7 +339,7 @@ pub trait LendToCudaAsync: RustToCudaAsync { /// /// # Errors /// - /// Returns a `rustacuda::errors::CudaError` iff an error occurs inside CUDA + /// Returns a `cust::errors::CudaError` iff an error occurs inside CUDA fn lend_to_cuda_mut_async< 'a, 'stream, diff --git a/src/utils/adapter.rs b/src/utils/adapter.rs index bc8bd161b..c8a533d80 100644 --- a/src/utils/adapter.rs +++ b/src/utils/adapter.rs @@ -124,7 +124,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -136,7 +136,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail): (NoCudaAlloc, A) = alloc.split(); Ok(alloc_tail) @@ -153,7 +153,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -169,7 +169,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, @@ -312,7 +312,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -324,7 +324,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail): (NoCudaAlloc, A) = alloc.split(); Ok(alloc_tail) @@ -341,7 +341,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -357,7 +357,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, @@ -394,7 +394,7 @@ unsafe impl CudaAsRust #[repr(transparent)] pub struct DeviceCopyWithPortableBitSemantics(T); -unsafe impl rustacuda_core::DeviceCopy +unsafe impl cust_core::DeviceCopy for DeviceCopyWithPortableBitSemantics { } diff --git a/src/utils/aliasing/const.rs b/src/utils/aliasing/const.rs index 4cd6eb228..624aa1ea5 100644 --- a/src/utils/aliasing/const.rs +++ b/src/utils/aliasing/const.rs @@ -193,7 +193,7 @@ unsafe impl RustToCuda unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -209,7 +209,7 @@ unsafe impl RustToCuda unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.0.restore(alloc) } } @@ -224,7 +224,7 @@ unsafe impl RustToCudaAsync &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -252,7 +252,7 @@ unsafe impl RustToCudaAsync this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/utils/aliasing/dynamic.rs b/src/utils/aliasing/dynamic.rs index 2e16bf42e..2fd8c3646 100644 --- a/src/utils/aliasing/dynamic.rs +++ b/src/utils/aliasing/dynamic.rs @@ -170,7 +170,7 @@ unsafe impl RustToCuda for SplitSliceOverCudaThreadsDynamicStride unsafe fn borrow( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, crate::alloc::CombinedCudaAlloc, )> { @@ -189,7 +189,7 @@ unsafe impl RustToCuda for SplitSliceOverCudaThreadsDynamicStride unsafe fn restore( &mut self, alloc: crate::alloc::CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.inner.restore(alloc) } } @@ -202,7 +202,7 @@ unsafe impl RustToCudaAsync for SplitSliceOverCudaThreadsDyn &self, alloc: A, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async<'_, 'stream, DeviceAccessible>, crate::alloc::CombinedCudaAlloc, )> { @@ -232,7 +232,7 @@ unsafe impl RustToCudaAsync for SplitSliceOverCudaThreadsDyn this: owning_ref::BoxRefMut<'a, O, Self>, alloc: crate::alloc::CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( crate::utils::r#async::Async< 'a, 'stream, diff --git a/src/utils/async.rs b/src/utils/async.rs index 6221447a8..899e94cdb 100644 --- a/src/utils/async.rs +++ b/src/utils/async.rs @@ -2,7 +2,7 @@ use std::{borrow::BorrowMut, future::Future, future::IntoFuture, marker::PhantomData, task::Poll}; #[cfg(feature = "host")] -use rustacuda::{ +use cust::{ error::CudaError, error::CudaResult, event::Event, event::EventFlags, stream::StreamWaitEventFlags, }; @@ -136,7 +136,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// such that its computation can be synchronised on. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn pending(value: T, stream: Stream<'stream>, completion: C) -> CudaResult { let (sender, receiver) = oneshot::channel(); @@ -160,11 +160,11 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// operations. /// /// Calling `synchronize` after the computation has completed, e.g. after - /// calling [`rustacuda::stream::Stream::synchronize`], should be very + /// calling [`cust::stream::Stream::synchronize`], should be very /// cheap. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn synchronize(self) -> CudaResult { let (_stream, mut value, status) = self.destructure_into_parts(); @@ -198,7 +198,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> Async<'a, 'strea /// used on the new one. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA. pub fn move_to_stream<'stream_new>( self, diff --git a/src/utils/exchange/buffer/host.rs b/src/utils/exchange/buffer/host.rs index f5a3e5308..5c766fcae 100644 --- a/src/utils/exchange/buffer/host.rs +++ b/src/utils/exchange/buffer/host.rs @@ -4,7 +4,7 @@ use std::{ }; use const_type_layout::TypeGraphLayout; -use rustacuda::{ +use cust::{ error::CudaResult, memory::{DeviceBuffer, LockedBuffer}, }; @@ -45,7 +45,7 @@ impl< > CudaExchangeBufferHost { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn new(elem: &T, capacity: usize) -> CudaResult { // Safety: CudaExchangeItem is a `repr(transparent)` wrapper around T @@ -70,7 +70,7 @@ impl { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn from_vec(vec: Vec) -> CudaResult { let host_buffer = unsafe { @@ -127,7 +127,7 @@ impl( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible>, CombinedCudaAlloc, )> { @@ -138,7 +138,7 @@ impl( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { let (_alloc_front, alloc_tail) = alloc.split(); if M2H { // Only move the buffer contents back to the host if needed - rustacuda::memory::CopyDestination::copy_to( + cust::memory::CopyDestination::copy_to( &***self.device_buffer.get_mut(), self.host_buffer.as_mut_slice(), )?; @@ -180,7 +180,7 @@ impl, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>>, CombinedCudaAlloc, )> { @@ -191,7 +191,7 @@ impl, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { @@ -228,7 +228,7 @@ impl CudaExchangeBuffer { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA - pub fn new(elem: &T, capacity: usize) -> rustacuda::error::CudaResult { + pub fn new(elem: &T, capacity: usize) -> cust::error::CudaResult { Ok(Self { inner: host::CudaExchangeBufferHost::new(elem, capacity)?, }) @@ -77,9 +77,9 @@ impl { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA - pub fn from_vec(vec: Vec) -> rustacuda::error::CudaResult { + pub fn from_vec(vec: Vec) -> cust::error::CudaResult { Ok(Self { inner: host::CudaExchangeBufferHost::from_vec(vec)?, }) @@ -117,7 +117,7 @@ unsafe impl( &self, alloc: A, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( DeviceAccessible, CombinedCudaAlloc, )> { @@ -128,7 +128,7 @@ unsafe impl( &mut self, alloc: CombinedCudaAlloc, - ) -> rustacuda::error::CudaResult { + ) -> cust::error::CudaResult { self.inner.restore(alloc) } } @@ -144,7 +144,7 @@ unsafe impl, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'_, 'stream, DeviceAccessible>, CombinedCudaAlloc, )> { @@ -156,7 +156,7 @@ unsafe impl, alloc: CombinedCudaAlloc, stream: crate::host::Stream<'stream>, - ) -> rustacuda::error::CudaResult<( + ) -> cust::error::CudaResult<( Async<'a, 'stream, owning_ref::BoxRefMut<'a, O, Self>, CompletionFnMut<'a, Self>>, A, )> { diff --git a/src/utils/exchange/wrapper.rs b/src/utils/exchange/wrapper.rs index ed15c63de..36bd68614 100644 --- a/src/utils/exchange/wrapper.rs +++ b/src/utils/exchange/wrapper.rs @@ -1,6 +1,6 @@ use std::ops::{Deref, DerefMut}; -use rustacuda::{ +use cust::{ error::CudaResult, memory::{AsyncCopyDestination, CopyDestination, DeviceBox, LockedBox}, }; @@ -55,7 +55,7 @@ pub struct ExchangeWrapperOnDevice impl> ExchangeWrapperOnHost { /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn new(value: T) -> CudaResult { // Safety: The uninitialised memory is never exposed @@ -65,13 +65,13 @@ impl> ExchangeWrapperOnHost { let (cuda_repr, _null_alloc) = unsafe { value.borrow(NoCudaAlloc) }?; let locked_cuda_repr = unsafe { - let mut uninit = CudaDropWrapper::from(LockedBox::< + let uninit = CudaDropWrapper::from(LockedBox::< DeviceCopyWithPortableBitSemantics< DeviceAccessible<::CudaRepresentation>, >, >::uninitialized()?); uninit - .as_mut_ptr() + .as_raw() .write(DeviceCopyWithPortableBitSemantics::from(cuda_repr)); uninit }; @@ -88,7 +88,7 @@ impl> ExchangeWrapperOnHost { /// via [`ExchangeWrapperOnDevice::as_mut_async`](Async::as_mut_async). /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_device(mut self) -> CudaResult> { let (cuda_repr, null_alloc) = unsafe { self.value.borrow(NoCudaAlloc) }?; @@ -113,7 +113,7 @@ impl( mut self, @@ -130,7 +130,7 @@ impl> ExchangeWrapperOnDevice { /// Moves the data synchronously back to the host CPU device. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_host(mut self) -> CudaResult> { let null_alloc = NoCudaAlloc.into(); @@ -201,7 +201,7 @@ impl( self, @@ -259,7 +259,7 @@ impl< /// Moves the data asynchronously back to the host CPU device. /// /// # Errors - /// Returns a [`rustacuda::error::CudaError`] iff an error occurs inside + /// Returns a [`cust::error::CudaError`] iff an error occurs inside /// CUDA pub fn move_to_host_async( self, From bfc332a917fb8d3c7ddbfbbc938d564de6f04498 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sat, 8 Feb 2025 21:41:43 +0000 Subject: [PATCH 02/10] Update to experimental cust fork --- Cargo.toml | 6 ++-- examples/print/src/main.rs | 6 ++-- rust-cuda-kernel/src/kernel/link/mod.rs | 33 ++++++++-------------- rust-cuda-kernel/src/kernel/lints.rs | 2 +- rust-toolchain | 6 ++-- src/host/mod.rs | 9 +++--- src/kernel/mod.rs | 5 ++-- src/kernel/param.rs | 37 +++++++++++++------------ src/lend/impls/mod.rs | 2 +- src/lend/impls/ref.rs | 2 +- src/lend/impls/slice_ref.rs | 1 + src/lib.rs | 2 +- src/safety/aliasing.rs | 6 ++-- src/utils/async.rs | 16 +++++------ src/utils/exchange/wrapper.rs | 3 +- src/utils/ffi.rs | 14 +++++----- 16 files changed, 69 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 450c0a989..c0e307352 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,9 +98,9 @@ kernel = ["dep:rust-cuda-kernel"] [dependencies] const-type-layout = { version = "0.3.2", default-features = false, features = ["derive"] } # FIXME: cust fails to compile without the `bytemuck` feature -cust = { version = "0.3.2", default-features = false, features = ["bytemuck"], optional = true } -cust_core = { version = "0.1", default-features = false } -cust_derive = { version = "0.2", default-features = false, optional = true } +cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false, features = ["bytemuck"], optional = true } +cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } +cust_derive = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.2", default-features = false, optional = true } final = { version = "0.1.1", default-features = false, optional = true } oneshot = { version = "0.1", default-features = false, features = ["std", "async"], optional = true } regex = { version = "1.10", default-features = false, optional = true } diff --git a/examples/print/src/main.rs b/examples/print/src/main.rs index 1998a7057..e4ae250d0 100644 --- a/examples/print/src/main.rs +++ b/examples/print/src/main.rs @@ -15,9 +15,9 @@ fn main() -> rust_cuda::deps::cust::error::CudaResult<()> { // Create a CUDA context associated to this device let _context = rust_cuda::host::CudaDropWrapper::from( - rust_cuda::deps::cust::context::Context::create_and_push( - rust_cuda::deps::cust::context::ContextFlags::MAP_HOST - | rust_cuda::deps::cust::context::ContextFlags::SCHED_AUTO, + rust_cuda::deps::cust::context::legacy::Context::create_and_push( + rust_cuda::deps::cust::context::legacy::ContextFlags::MAP_HOST + | rust_cuda::deps::cust::context::legacy::ContextFlags::SCHED_AUTO, device, )?, ); diff --git a/rust-cuda-kernel/src/kernel/link/mod.rs b/rust-cuda-kernel/src/kernel/link/mod.rs index 49db5c264..00f33edd3 100644 --- a/rust-cuda-kernel/src/kernel/link/mod.rs +++ b/rust-cuda-kernel/src/kernel/link/mod.rs @@ -189,6 +189,7 @@ fn extract_ptx_kernel_layout(kernel_ptx: &mut String) -> proc_macro2::TokenStrea ); } + #[allow(clippy::literal_string_with_formatting_args)] // false positive if type_layout_metas .insert(String::from(param), bytes) .is_some() @@ -320,8 +321,7 @@ fn check_kernel_ptx_and_report( Ok(None) => (), Ok(Some(binary)) => { if ptx_lint_levels - .get(&PtxLint::DumpAssembly) - .map_or(false, |level| *level > LintLevel::Allow) + .get(&PtxLint::DumpAssembly).is_some_and(|level| *level > LintLevel::Allow) { const HEX: [char; 16] = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', @@ -335,8 +335,7 @@ fn check_kernel_ptx_and_report( } if ptx_lint_levels - .get(&PtxLint::DumpAssembly) - .map_or(false, |level| *level > LintLevel::Warn) + .get(&PtxLint::DumpAssembly).is_some_and(|level| *level > LintLevel::Warn) { emit_call_site_error!( "{} compiled binary:\n{}\n\n{}", @@ -458,27 +457,22 @@ fn check_kernel_ptx( let mut options = options.clone(); if ptx_lint_levels - .get(&PtxLint::Verbose) - .map_or(false, |level| *level > LintLevel::Warn) + .get(&PtxLint::Verbose).is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--verbose"); } if ptx_lint_levels - .get(&PtxLint::DoublePrecisionUse) - .map_or(false, |level| *level > LintLevel::Warn) + .get(&PtxLint::DoublePrecisionUse).is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels - .get(&PtxLint::LocalMemoryUse) - .map_or(false, |level| *level > LintLevel::Warn) + .get(&PtxLint::LocalMemoryUse).is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels - .get(&PtxLint::RegisterSpills) - .map_or(false, |level| *level > LintLevel::Warn) - { + .get(&PtxLint::RegisterSpills).is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-spills"); } if ptx_lint_levels @@ -504,26 +498,21 @@ fn check_kernel_ptx( }; if ptx_lint_levels - .get(&PtxLint::Verbose) - .map_or(false, |level| *level > LintLevel::Allow) + .get(&PtxLint::Verbose).is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--verbose"); } if ptx_lint_levels - .get(&PtxLint::DoublePrecisionUse) - .map_or(false, |level| *level > LintLevel::Allow) - { + .get(&PtxLint::DoublePrecisionUse).is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels - .get(&PtxLint::LocalMemoryUse) - .map_or(false, |level| *level > LintLevel::Allow) + .get(&PtxLint::LocalMemoryUse).is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels - .get(&PtxLint::RegisterSpills) - .map_or(false, |level| *level > LintLevel::Allow) + .get(&PtxLint::RegisterSpills).is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-spills"); } diff --git a/rust-cuda-kernel/src/kernel/lints.rs b/rust-cuda-kernel/src/kernel/lints.rs index dd85a289f..c5d05704d 100644 --- a/rust-cuda-kernel/src/kernel/lints.rs +++ b/rust-cuda-kernel/src/kernel/lints.rs @@ -180,7 +180,7 @@ pub trait NestedMetaParser { ) -> syn::Result<()>; } -impl<'a> NestedMetaParser for syn::meta::ParseNestedMeta<'a> { +impl NestedMetaParser for syn::meta::ParseNestedMeta<'_> { fn path(&self) -> &syn::Path { &self.path } diff --git a/rust-toolchain b/rust-toolchain index 071c4ebfe..2404f256b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,5 +1,5 @@ [toolchain] -# Pin to final 1.81.0 nightly -channel = "nightly-2024-07-21" +# Pin to final 1.85.0 nightly +channel = "nightly-2025-01-03" components = [ "cargo", "rustfmt", "clippy", "llvm-bitcode-linker", "llvm-tools" ] -targets = [ "x86_64-unknown-linux-gnu", "nvptx64-nvidia-cuda" ] +targets = [ "nvptx64-nvidia-cuda" ] diff --git a/src/host/mod.rs b/src/host/mod.rs index 782b589f8..c97452438 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -35,7 +35,7 @@ pub struct Stream<'stream> { _brand: InvariantLifetime<'stream>, } -impl<'stream> Deref for Stream<'stream> { +impl Deref for Stream<'_> { type Target = cust::stream::Stream; fn deref(&self) -> &Self::Target { @@ -43,7 +43,7 @@ impl<'stream> Deref for Stream<'stream> { } } -impl<'stream> Stream<'stream> { +impl Stream<'_> { /// Create a new uniquely branded [`Stream`], which can bind async /// operations to the [`Stream`] that they are computed on. /// @@ -152,6 +152,7 @@ macro_rules! impl_sealed_drop_value { impl_sealed_drop_value!(Module); impl_sealed_drop_value!(cust::stream::Stream); impl_sealed_drop_value!(Context); +impl_sealed_drop_value!(cust::context::legacy::Context); impl_sealed_drop_value!(Event); #[expect(clippy::module_name_repetitions)] @@ -271,13 +272,13 @@ pub struct HostAndDeviceConstRef<'a, T: PortableBitSemantics + TypeGraphLayout> host_ref: &'a T, } -impl<'a, T: PortableBitSemantics + TypeGraphLayout> Clone for HostAndDeviceConstRef<'a, T> { +impl Clone for HostAndDeviceConstRef<'_, T> { fn clone(&self) -> Self { *self } } -impl<'a, T: PortableBitSemantics + TypeGraphLayout> Copy for HostAndDeviceConstRef<'a, T> {} +impl Copy for HostAndDeviceConstRef<'_, T> {} impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceConstRef<'a, T> { /// # Errors diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 95d21457a..43f8fb563 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -7,12 +7,11 @@ use std::{ ptr::NonNull, }; -use cust::module::{ModuleJitOption, OptLevel}; #[cfg(feature = "host")] use cust::{ error::{CudaError, CudaResult}, function::Function, - module::Module, + module::{Module, ModuleJitOption, OptLevel}, }; #[cfg(feature = "kernel")] @@ -226,7 +225,7 @@ macro_rules! impl_launcher_launch { } #[cfg(feature = "host")] -impl<'stream, 'kernel, Kernel> Launcher<'stream, 'kernel, Kernel> { +impl<'stream, Kernel> Launcher<'stream, '_, Kernel> { impl_launcher_launch! { launch0() => with0_async => launch0_async } impl_launcher_launch! { launch1( diff --git a/src/kernel/param.rs b/src/kernel/param.rs index 6d95224dc..a54044e2f 100644 --- a/src/kernel/param.rs +++ b/src/kernel/param.rs @@ -157,6 +157,7 @@ impl< { } +#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] impl< 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, @@ -244,9 +245,8 @@ impl< } } impl< - 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, - > sealed::Sealed for &'a PerThreadShallowCopy + > sealed::Sealed for &PerThreadShallowCopy { } @@ -342,9 +342,8 @@ impl< } } impl< - 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, - > sealed::Sealed for &'a PtxJit> + > sealed::Sealed for &PtxJit> { } @@ -374,6 +373,7 @@ impl< } } +#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] impl< 'a, T: Sync @@ -467,13 +467,12 @@ impl< } } impl< - 'a, T: crate::safety::StackOnly + Sync + crate::safety::PortableBitSemantics + TypeGraphLayout + InteriorMutableSync, - > sealed::Sealed for &'a ShallowInteriorMutable + > sealed::Sealed for &ShallowInteriorMutable { } @@ -618,6 +617,7 @@ impl< { } +#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> @@ -707,8 +707,9 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow sealed::Sealed for &'a DeepPerThreadBorrow {} +impl sealed::Sealed for &DeepPerThreadBorrow {} +#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter for &'a mut DeepPerThreadBorrow { @@ -806,8 +807,8 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } } } -impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> sealed::Sealed - for &'a mut DeepPerThreadBorrow +impl sealed::Sealed + for &mut DeepPerThreadBorrow { } @@ -994,7 +995,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a PtxJit sealed::Sealed for &'a PtxJit> {} +impl sealed::Sealed for &PtxJit> {} impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter for &'a mut PtxJit> @@ -1090,8 +1091,8 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } } } -impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> sealed::Sealed - for &'a mut PtxJit> +impl sealed::Sealed + for &mut PtxJit> { } @@ -1135,7 +1136,7 @@ mod private_shared { } } -impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::ThreadBlockShared { +impl CudaKernelParameter for &mut crate::utils::shared::ThreadBlockShared { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> = &'b mut crate::utils::shared::ThreadBlockShared @@ -1218,10 +1219,10 @@ impl<'a, T: 'static> CudaKernelParameter for &'a mut crate::utils::shared::Threa inner.with(&mut param) } } -impl<'a, T: 'static> sealed::Sealed for &'a mut crate::utils::shared::ThreadBlockShared {} +impl sealed::Sealed for &mut crate::utils::shared::ThreadBlockShared {} -impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParameter - for &'a mut crate::utils::shared::ThreadBlockSharedSlice +impl CudaKernelParameter + for &mut crate::utils::shared::ThreadBlockSharedSlice { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> @@ -1307,7 +1308,7 @@ impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> CudaKernelParamete } } } -impl<'a, T: 'static + PortableBitSemantics + TypeGraphLayout> sealed::Sealed - for &'a mut crate::utils::shared::ThreadBlockSharedSlice +impl sealed::Sealed + for &mut crate::utils::shared::ThreadBlockSharedSlice { } diff --git a/src/lend/impls/mod.rs b/src/lend/impls/mod.rs index 7f7af6ad2..13ee7d6a0 100644 --- a/src/lend/impls/mod.rs +++ b/src/lend/impls/mod.rs @@ -1,5 +1,5 @@ mod arc; -mod arced_slice; +// mod arced_slice; mod r#box; mod boxed_slice; #[cfg(feature = "final")] diff --git a/src/lend/impls/ref.rs b/src/lend/impls/ref.rs index 99318e055..d49f31cc6 100644 --- a/src/lend/impls/ref.rs +++ b/src/lend/impls/ref.rs @@ -71,7 +71,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a T } } -unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for &'a T { +unsafe impl RustToCudaAsync for &T { #[cfg(all(feature = "host", not(doc)))] type CudaAllocationAsync = CombinedCudaAlloc< CudaDropWrapper>>>, diff --git a/src/lend/impls/slice_ref.rs b/src/lend/impls/slice_ref.rs index 53558a17c..08f1c8418 100644 --- a/src/lend/impls/slice_ref.rs +++ b/src/lend/impls/slice_ref.rs @@ -74,6 +74,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a [T } } +#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for &'a [T] { #[cfg(all(feature = "host", not(doc)))] type CudaAllocationAsync = CombinedCudaAlloc< diff --git a/src/lib.rs b/src/lib.rs index 5605ad612..c065da2e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -48,7 +48,7 @@ #![feature(generic_const_exprs)] #![expect(internal_features)] #![feature(core_intrinsics)] -#![feature(const_intrinsic_compare_bytes)] +// #![feature(const_intrinsic_compare_bytes)] #![doc(html_root_url = "https://juntyr.github.io/rust-cuda/")] #[cfg(all(feature = "host", feature = "device", not(doc)))] diff --git a/src/safety/aliasing.rs b/src/safety/aliasing.rs index 3a9cb8442..fefe7be0c 100644 --- a/src/safety/aliasing.rs +++ b/src/safety/aliasing.rs @@ -38,23 +38,21 @@ pub unsafe trait SafeMutableAliasing {} unsafe impl< - 'a, T: crate::safety::StackOnly + crate::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout, const STRIDE: usize, > SafeMutableAliasing - for crate::utils::aliasing::SplitSliceOverCudaThreadsConstStride<&'a mut [T], STRIDE> + for crate::utils::aliasing::SplitSliceOverCudaThreadsConstStride<&mut [T], STRIDE> { } unsafe impl< - 'a, T: crate::safety::StackOnly + crate::safety::PortableBitSemantics + const_type_layout::TypeGraphLayout, > SafeMutableAliasing - for crate::utils::aliasing::SplitSliceOverCudaThreadsDynamicStride<&'a mut [T]> + for crate::utils::aliasing::SplitSliceOverCudaThreadsDynamicStride<&mut [T]> { } diff --git a/src/utils/async.rs b/src/utils/async.rs index 899e94cdb..1dfb79745 100644 --- a/src/utils/async.rs +++ b/src/utils/async.rs @@ -55,7 +55,7 @@ impl Completion for NoCompletion { impl sealed::Sealed for NoCompletion {} #[cfg(feature = "host")] -impl<'a, T: ?Sized + BorrowMut, B: ?Sized> Completion for CompletionFnMut<'a, B> { +impl, B: ?Sized> Completion for CompletionFnMut<'_, B> { type Completed = B; #[inline] @@ -74,7 +74,7 @@ impl<'a, T: ?Sized + BorrowMut, B: ?Sized> Completion for CompletionFnMut< } } #[cfg(feature = "host")] -impl<'a, T: ?Sized> sealed::Sealed for CompletionFnMut<'a, T> {} +impl sealed::Sealed for CompletionFnMut<'_, T> {} #[cfg(feature = "host")] impl, C: Completion> Completion for Option { @@ -87,7 +87,7 @@ impl, C: Completion> Completion for Op #[inline] fn synchronize_on_drop(&self) -> bool { - self.as_ref().map_or(false, Completion::synchronize_on_drop) + self.as_ref().is_some_and(Completion::synchronize_on_drop) } #[inline] @@ -407,7 +407,7 @@ where } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Drop for Async<'a, 'stream, T, C> { +impl, C: Completion> Drop for Async<'_, '_, T, C> { fn drop(&mut self) { let AsyncStatus::Processing { receiver, @@ -434,8 +434,8 @@ struct AsyncFuture<'a, 'stream, T: BorrowMut, C: Completion> { } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Future - for AsyncFuture<'a, 'stream, T, C> +impl, C: Completion> Future + for AsyncFuture<'_, '_, T, C> { type Output = CudaResult; @@ -517,8 +517,8 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> IntoFuture } #[cfg(feature = "host")] -impl<'a, 'stream, T: BorrowMut, C: Completion> Drop - for AsyncFuture<'a, 'stream, T, C> +impl, C: Completion> Drop + for AsyncFuture<'_, '_, T, C> { fn drop(&mut self) { let Some(mut value) = self.value.take() else { diff --git a/src/utils/exchange/wrapper.rs b/src/utils/exchange/wrapper.rs index 36bd68614..3c56ebfc1 100644 --- a/src/utils/exchange/wrapper.rs +++ b/src/utils/exchange/wrapper.rs @@ -251,10 +251,9 @@ impl, - > Async<'a, 'stream, ExchangeWrapperOnDevice, NoCompletion> + > Async<'_, 'stream, ExchangeWrapperOnDevice, NoCompletion> { /// Moves the data asynchronously back to the host CPU device. /// diff --git a/src/utils/ffi.rs b/src/utils/ffi.rs index 9566a0c40..f94af17d8 100644 --- a/src/utils/ffi.rs +++ b/src/utils/ffi.rs @@ -66,16 +66,16 @@ pub struct DeviceConstRef<'r, T: PortableBitSemantics + 'r> { pub(crate) reference: PhantomData<&'r T>, } -impl<'r, T: PortableBitSemantics> Copy for DeviceConstRef<'r, T> {} +impl Copy for DeviceConstRef<'_, T> {} -impl<'r, T: PortableBitSemantics> Clone for DeviceConstRef<'r, T> { +impl Clone for DeviceConstRef<'_, T> { fn clone(&self) -> Self { *self } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceConstRef<'r, T> { +impl AsRef for DeviceConstRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } @@ -90,14 +90,14 @@ pub struct DeviceMutRef<'r, T: PortableBitSemantics + 'r> { } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceMutRef<'r, T> { +impl AsRef for DeviceMutRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsMut for DeviceMutRef<'r, T> { +impl AsMut for DeviceMutRef<'_, T> { fn as_mut(&mut self) -> &mut T { unsafe { &mut *self.pointer.0 } } @@ -113,14 +113,14 @@ pub struct DeviceOwnedRef<'r, T: PortableBitSemantics> { } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsRef for DeviceOwnedRef<'r, T> { +impl AsRef for DeviceOwnedRef<'_, T> { fn as_ref(&self) -> &T { unsafe { &*self.pointer.0 } } } #[cfg(feature = "device")] -impl<'r, T: PortableBitSemantics> AsMut for DeviceOwnedRef<'r, T> { +impl AsMut for DeviceOwnedRef<'_, T> { fn as_mut(&mut self) -> &mut T { unsafe { &mut *self.pointer.0 } } From 9a53d25e320d219bfa649bf17e20c6b051cd4714 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 9 Feb 2025 05:25:27 +0000 Subject: [PATCH 03/10] Fix clippy lints --- examples/lifetime/src/main.rs | 20 ++++++------ rust-cuda-derive/src/rust_to_cuda/field_ty.rs | 1 - rust-cuda-derive/src/rust_to_cuda/mod.rs | 2 +- rust-cuda-kernel/src/kernel/link/mod.rs | 32 +++++++++++++------ src/kernel/mod.rs | 3 +- src/kernel/param.rs | 5 +-- src/kernel/ptx_jit/regex.rs | 4 --- src/lend/impls/arc.rs | 1 - src/lend/impls/box.rs | 1 - src/lend/impls/boxed_slice.rs | 1 - src/lend/impls/final.rs | 1 - src/lend/impls/option.rs | 1 - src/lend/impls/ref.rs | 1 - src/lend/impls/ref_mut.rs | 1 - src/lend/impls/slice_ref.rs | 1 - src/lend/impls/slice_ref_mut.rs | 1 - src/lib.rs | 2 -- src/safety/aliasing.rs | 1 - src/safety/portable.rs | 1 - src/utils/async.rs | 8 ++--- src/utils/exchange/buffer/device.rs | 1 - src/utils/exchange/buffer/host.rs | 1 - src/utils/shared/slice.rs | 1 - 23 files changed, 37 insertions(+), 54 deletions(-) diff --git a/examples/lifetime/src/main.rs b/examples/lifetime/src/main.rs index 78cbe943d..212d5de45 100644 --- a/examples/lifetime/src/main.rs +++ b/examples/lifetime/src/main.rs @@ -2,30 +2,30 @@ use lifetime::{kernel, link}; -fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { +fn main() -> rust_cuda::deps::cust::error::CudaResult<()> { // Link the lifetime-only-generic CUDA kernel struct KernelPtx<'a, 'b>(core::marker::PhantomData<(&'a (), &'b ())>); link! { impl kernel<'a, 'b> for KernelPtx } // Initialize the CUDA API - rust_cuda::deps::rustacuda::init(rust_cuda::deps::rustacuda::CudaFlags::empty())?; + rust_cuda::deps::cust::init(rust_cuda::deps::cust::CudaFlags::empty())?; // Get the first CUDA GPU device - let device = rust_cuda::deps::rustacuda::device::Device::get_device(0)?; + let device = rust_cuda::deps::cust::device::Device::get_device(0)?; // Create a CUDA context associated to this device let _context = rust_cuda::host::CudaDropWrapper::from( - rust_cuda::deps::rustacuda::context::Context::create_and_push( - rust_cuda::deps::rustacuda::context::ContextFlags::MAP_HOST - | rust_cuda::deps::rustacuda::context::ContextFlags::SCHED_AUTO, + rust_cuda::deps::cust::context::legacy::Context::create_and_push( + rust_cuda::deps::cust::context::legacy::ContextFlags::MAP_HOST + | rust_cuda::deps::cust::context::legacy::ContextFlags::SCHED_AUTO, device, )?, ); // Create a new CUDA stream to submit kernels to let mut stream = - rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::rustacuda::stream::Stream::new( - rust_cuda::deps::rustacuda::stream::StreamFlags::NON_BLOCKING, + rust_cuda::host::CudaDropWrapper::from(rust_cuda::deps::cust::stream::Stream::new( + rust_cuda::deps::cust::stream::StreamFlags::NON_BLOCKING, None, )?); @@ -34,8 +34,8 @@ fn main() -> rust_cuda::deps::rustacuda::error::CudaResult<()> { // Create a new instance of the CUDA kernel and prepare the launch config let mut kernel = rust_cuda::kernel::TypedPtxKernel::::new::(None); let config = rust_cuda::kernel::LaunchConfig { - grid: rust_cuda::deps::rustacuda::function::GridSize::x(1), - block: rust_cuda::deps::rustacuda::function::BlockSize::x(4), + grid: rust_cuda::deps::cust::function::GridSize::x(1), + block: rust_cuda::deps::cust::function::BlockSize::x(4), ptx_jit: false, }; diff --git a/rust-cuda-derive/src/rust_to_cuda/field_ty.rs b/rust-cuda-derive/src/rust_to_cuda/field_ty.rs index 4278d308c..8e167a626 100644 --- a/rust-cuda-derive/src/rust_to_cuda/field_ty.rs +++ b/rust-cuda-derive/src/rust_to_cuda/field_ty.rs @@ -1,6 +1,5 @@ use syn::{parse_quote, spanned::Spanned}; -#[expect(clippy::module_name_repetitions)] pub enum CudaReprFieldTy { SafeDeviceCopy, RustToCuda { diff --git a/rust-cuda-derive/src/rust_to_cuda/mod.rs b/rust-cuda-derive/src/rust_to_cuda/mod.rs index 800c58fa7..099f97a1d 100644 --- a/rust-cuda-derive/src/rust_to_cuda/mod.rs +++ b/rust-cuda-derive/src/rust_to_cuda/mod.rs @@ -10,7 +10,7 @@ fn get_cuda_repr_ident(rust_repr_ident: &proc_macro2::Ident) -> proc_macro2::Ide format_ident!("{}CudaRepresentation", rust_repr_ident) } -#[expect(clippy::module_name_repetitions, clippy::too_many_lines)] +#[expect(clippy::too_many_lines)] pub fn impl_rust_to_cuda(ast: &syn::DeriveInput) -> proc_macro::TokenStream { let (mut struct_fields_cuda, struct_semi_cuda) = if let syn::Data::Struct(s) = &ast.data { (s.fields.clone(), s.semi_token) diff --git a/rust-cuda-kernel/src/kernel/link/mod.rs b/rust-cuda-kernel/src/kernel/link/mod.rs index 00f33edd3..8b4a549bb 100644 --- a/rust-cuda-kernel/src/kernel/link/mod.rs +++ b/rust-cuda-kernel/src/kernel/link/mod.rs @@ -321,7 +321,8 @@ fn check_kernel_ptx_and_report( Ok(None) => (), Ok(Some(binary)) => { if ptx_lint_levels - .get(&PtxLint::DumpAssembly).is_some_and(|level| *level > LintLevel::Allow) + .get(&PtxLint::DumpAssembly) + .is_some_and(|level| *level > LintLevel::Allow) { const HEX: [char; 16] = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', @@ -335,7 +336,8 @@ fn check_kernel_ptx_and_report( } if ptx_lint_levels - .get(&PtxLint::DumpAssembly).is_some_and(|level| *level > LintLevel::Warn) + .get(&PtxLint::DumpAssembly) + .is_some_and(|level| *level > LintLevel::Warn) { emit_call_site_error!( "{} compiled binary:\n{}\n\n{}", @@ -457,22 +459,27 @@ fn check_kernel_ptx( let mut options = options.clone(); if ptx_lint_levels - .get(&PtxLint::Verbose).is_some_and(|level| *level > LintLevel::Warn) + .get(&PtxLint::Verbose) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--verbose"); } if ptx_lint_levels - .get(&PtxLint::DoublePrecisionUse).is_some_and(|level| *level > LintLevel::Warn) + .get(&PtxLint::DoublePrecisionUse) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels - .get(&PtxLint::LocalMemoryUse).is_some_and(|level| *level > LintLevel::Warn) + .get(&PtxLint::LocalMemoryUse) + .is_some_and(|level| *level > LintLevel::Warn) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels - .get(&PtxLint::RegisterSpills).is_some_and(|level| *level > LintLevel::Warn) { + .get(&PtxLint::RegisterSpills) + .is_some_and(|level| *level > LintLevel::Warn) + { options.push(c"--warn-on-spills"); } if ptx_lint_levels @@ -498,21 +505,26 @@ fn check_kernel_ptx( }; if ptx_lint_levels - .get(&PtxLint::Verbose).is_some_and(|level| *level > LintLevel::Allow) + .get(&PtxLint::Verbose) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--verbose"); } if ptx_lint_levels - .get(&PtxLint::DoublePrecisionUse).is_some_and(|level| *level > LintLevel::Allow) { + .get(&PtxLint::DoublePrecisionUse) + .is_some_and(|level| *level > LintLevel::Allow) + { options.push(c"--warn-on-double-precision-use"); } if ptx_lint_levels - .get(&PtxLint::LocalMemoryUse).is_some_and(|level| *level > LintLevel::Allow) + .get(&PtxLint::LocalMemoryUse) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-local-memory-usage"); } if ptx_lint_levels - .get(&PtxLint::RegisterSpills).is_some_and(|level| *level > LintLevel::Allow) + .get(&PtxLint::RegisterSpills) + .is_some_and(|level| *level > LintLevel::Allow) { options.push(c"--warn-on-spills"); } diff --git a/src/kernel/mod.rs b/src/kernel/mod.rs index 43f8fb563..a6134af30 100644 --- a/src/kernel/mod.rs +++ b/src/kernel/mod.rs @@ -1,4 +1,3 @@ -use core::str; #[cfg(feature = "host")] use std::{ ffi::{CStr, CString}, @@ -309,7 +308,7 @@ impl RawPtxKernel { // FIXME: cust's Module::get_function takes a str and turns it back into // a CString immediately let function = unsafe { &*std::ptr::from_ref(module.as_ref()) } - .get_function(unsafe { str::from_utf8_unchecked(entry_point.to_bytes()) }); + .get_function(unsafe { core::str::from_utf8_unchecked(entry_point.to_bytes()) }); let function = match function { Ok(function) => function, diff --git a/src/kernel/param.rs b/src/kernel/param.rs index a54044e2f..34cd03f6b 100644 --- a/src/kernel/param.rs +++ b/src/kernel/param.rs @@ -807,10 +807,7 @@ impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter } } } -impl sealed::Sealed - for &mut DeepPerThreadBorrow -{ -} +impl sealed::Sealed for &mut DeepPerThreadBorrow {} impl< T: Send diff --git a/src/kernel/ptx_jit/regex.rs b/src/kernel/ptx_jit/regex.rs index f07f64fa5..d5237475e 100644 --- a/src/kernel/ptx_jit/regex.rs +++ b/src/kernel/ptx_jit/regex.rs @@ -2,7 +2,6 @@ use std::sync::OnceLock; use regex::bytes::Regex; -#[expect(clippy::module_name_repetitions)] pub fn const_marker_regex() -> &'static Regex { static CONST_MARKER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -12,7 +11,6 @@ pub fn const_marker_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn const_base_register_regex() -> &'static Regex { static CONST_BASE_REGISTER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -22,7 +20,6 @@ pub fn const_base_register_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn const_load_instruction_regex() -> &'static Regex { static CONST_LOAD_INSTRUCTION_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] @@ -54,7 +51,6 @@ pub fn const_load_instruction_regex() -> &'static Regex { }) } -#[expect(clippy::module_name_repetitions)] pub fn register_regex() -> &'static Regex { static REGISTER_REGEX: OnceLock = OnceLock::new(); #[allow(clippy::unwrap_used)] diff --git a/src/lend/impls/arc.rs b/src/lend/impls/arc.rs index 9bb3e1cb0..ec5527330 100644 --- a/src/lend/impls/arc.rs +++ b/src/lend/impls/arc.rs @@ -30,7 +30,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct ArcCudaRepresentation( DeviceOwnedPointer<_ArcInner>, ); diff --git a/src/lend/impls/box.rs b/src/lend/impls/box.rs index 9c16f07a4..2bd7ec78c 100644 --- a/src/lend/impls/box.rs +++ b/src/lend/impls/box.rs @@ -29,7 +29,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct BoxCudaRepresentation(DeviceOwnedPointer); unsafe impl RustToCuda for Box { diff --git a/src/lend/impls/boxed_slice.rs b/src/lend/impls/boxed_slice.rs index 8b0937b06..8d00e49e8 100644 --- a/src/lend/impls/boxed_slice.rs +++ b/src/lend/impls/boxed_slice.rs @@ -26,7 +26,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct BoxedSliceCudaRepresentation { diff --git a/src/lend/impls/final.rs b/src/lend/impls/final.rs index 51b228c24..68569d7a4 100644 --- a/src/lend/impls/final.rs +++ b/src/lend/impls/final.rs @@ -6,7 +6,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(const_type_layout::TypeLayout)] #[repr(transparent)] pub struct FinalCudaRepresentation(DeviceAccessible); diff --git a/src/lend/impls/option.rs b/src/lend/impls/option.rs index 931c7e952..bca51faf3 100644 --- a/src/lend/impls/option.rs +++ b/src/lend/impls/option.rs @@ -18,7 +18,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct OptionCudaRepresentation { diff --git a/src/lend/impls/ref.rs b/src/lend/impls/ref.rs index d49f31cc6..4224f51a5 100644 --- a/src/lend/impls/ref.rs +++ b/src/lend/impls/ref.rs @@ -27,7 +27,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct RefCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { data: DeviceConstPointer, _marker: PhantomData<&'a T>, diff --git a/src/lend/impls/ref_mut.rs b/src/lend/impls/ref_mut.rs index 6945c9bc2..3ade45276 100644 --- a/src/lend/impls/ref_mut.rs +++ b/src/lend/impls/ref_mut.rs @@ -24,7 +24,6 @@ use crate::{ #[doc(hidden)] #[repr(transparent)] #[derive(TypeLayout)] -#[expect(clippy::module_name_repetitions)] pub struct RefMutCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { data: DeviceMutPointer, _marker: PhantomData<&'a mut T>, diff --git a/src/lend/impls/slice_ref.rs b/src/lend/impls/slice_ref.rs index 08f1c8418..062058668 100644 --- a/src/lend/impls/slice_ref.rs +++ b/src/lend/impls/slice_ref.rs @@ -25,7 +25,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct SliceRefCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { diff --git a/src/lend/impls/slice_ref_mut.rs b/src/lend/impls/slice_ref_mut.rs index 59d9eeff5..c98ae3111 100644 --- a/src/lend/impls/slice_ref_mut.rs +++ b/src/lend/impls/slice_ref_mut.rs @@ -22,7 +22,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct SliceRefMutCudaRepresentation<'a, T: 'a + PortableBitSemantics + TypeGraphLayout> { diff --git a/src/lib.rs b/src/lib.rs index c065da2e0..0511e0191 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,6 @@ #![feature(negative_impls)] #![cfg_attr(all(feature = "device", not(doc)), feature(stdarch_nvptx))] #![cfg_attr(feature = "device", feature(asm_experimental_arch))] -#![cfg_attr(feature = "device", feature(asm_const))] #![feature(doc_auto_cfg)] #![feature(doc_cfg)] #![feature(marker_trait_attr)] @@ -48,7 +47,6 @@ #![feature(generic_const_exprs)] #![expect(internal_features)] #![feature(core_intrinsics)] -// #![feature(const_intrinsic_compare_bytes)] #![doc(html_root_url = "https://juntyr.github.io/rust-cuda/")] #[cfg(all(feature = "host", feature = "device", not(doc)))] diff --git a/src/safety/aliasing.rs b/src/safety/aliasing.rs index fefe7be0c..100dbbbd8 100644 --- a/src/safety/aliasing.rs +++ b/src/safety/aliasing.rs @@ -1,4 +1,3 @@ -#[expect(clippy::module_name_repetitions)] /// Types for which mutable references can be safely shared with each CUDA /// thread without breaking Rust's no-mutable-aliasing memory safety /// guarantees. diff --git a/src/safety/portable.rs b/src/safety/portable.rs index 6013b7d74..9e81d2cfc 100644 --- a/src/safety/portable.rs +++ b/src/safety/portable.rs @@ -36,7 +36,6 @@ macro_rules! portable_bit_semantics_docs { #[cfg(not(doc))] portable_bit_semantics_docs! { - #[expect(clippy::module_name_repetitions)] pub trait PortableBitSemantics: sealed::PortableBitSemantics {} } #[cfg(doc)] diff --git a/src/utils/async.rs b/src/utils/async.rs index 1dfb79745..a6791b313 100644 --- a/src/utils/async.rs +++ b/src/utils/async.rs @@ -434,9 +434,7 @@ struct AsyncFuture<'a, 'stream, T: BorrowMut, C: Completion> { } #[cfg(feature = "host")] -impl, C: Completion> Future - for AsyncFuture<'_, '_, T, C> -{ +impl, C: Completion> Future for AsyncFuture<'_, '_, T, C> { type Output = CudaResult; fn poll( @@ -517,9 +515,7 @@ impl<'a, 'stream, T: BorrowMut, C: Completion> IntoFuture } #[cfg(feature = "host")] -impl, C: Completion> Drop - for AsyncFuture<'_, '_, T, C> -{ +impl, C: Completion> Drop for AsyncFuture<'_, '_, T, C> { fn drop(&mut self) { let Some(mut value) = self.value.take() else { return; diff --git a/src/utils/exchange/buffer/device.rs b/src/utils/exchange/buffer/device.rs index 760fe4d35..047652186 100644 --- a/src/utils/exchange/buffer/device.rs +++ b/src/utils/exchange/buffer/device.rs @@ -9,7 +9,6 @@ use crate::{ use super::CudaExchangeItem; -#[expect(clippy::module_name_repetitions)] pub struct CudaExchangeBufferDevice< T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, diff --git a/src/utils/exchange/buffer/host.rs b/src/utils/exchange/buffer/host.rs index 5c766fcae..05b8d1ead 100644 --- a/src/utils/exchange/buffer/host.rs +++ b/src/utils/exchange/buffer/host.rs @@ -22,7 +22,6 @@ use crate::{ use super::{common::CudaExchangeBufferCudaRepresentation, CudaExchangeItem}; -#[expect(clippy::module_name_repetitions)] pub struct CudaExchangeBufferHost< T: StackOnly + PortableBitSemantics + TypeGraphLayout, const M2D: bool, diff --git a/src/utils/shared/slice.rs b/src/utils/shared/slice.rs index a3df82d06..f239ce7a8 100644 --- a/src/utils/shared/slice.rs +++ b/src/utils/shared/slice.rs @@ -2,7 +2,6 @@ use core::alloc::Layout; use const_type_layout::TypeGraphLayout; -#[expect(clippy::module_name_repetitions)] #[repr(transparent)] pub struct ThreadBlockSharedSlice { shared: *mut [T], From ccad446f4e81d59b131d347f378ec4fe6a98aa78 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 9 Feb 2025 06:17:42 +0000 Subject: [PATCH 04/10] Add back Arc<[T]> lending --- src/lend/impls/arced_slice.rs | 24 ++++++++++++------------ src/lend/impls/mod.rs | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/lend/impls/arced_slice.rs b/src/lend/impls/arced_slice.rs index 1fc334589..672cc2a6b 100644 --- a/src/lend/impls/arced_slice.rs +++ b/src/lend/impls/arced_slice.rs @@ -33,7 +33,6 @@ use crate::{ }; #[doc(hidden)] -#[expect(clippy::module_name_repetitions)] #[derive(TypeLayout)] #[repr(C)] pub struct ArcedSliceCudaRepresentation { @@ -81,31 +80,32 @@ unsafe impl RustToCuda for Arc<[T]> { DeviceAccessible, CombinedCudaAlloc, )> { - use cust::memory::{CopyDestination, DevicePointer, DeviceSlice}; + use cust::memory::{CopyDestination, DeviceSlice}; let data_ptr: *const T = std::ptr::from_ref(&**self).as_ptr(); let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); let arc_ptr: *const _ArcInner<[T; 42]> = data_ptr.byte_sub(offset).cast(); - let header_len = (offset + (std::mem::align_of::() - 1)) / std::mem::align_of::(); + let header_len = offset.div_ceil(std::mem::align_of::()); - let mut device_buffer = CudaDropWrapper::from(DeviceBuffer::< + let device_buffer = CudaDropWrapper::from(DeviceBuffer::< DeviceCopyWithPortableBitSemantics, >::uninitialized( header_len + self.len() )?); - let (header, buffer): (&mut DeviceSlice<_>, &mut DeviceSlice<_>) = - device_buffer.split_at_mut(header_len); + + let mut buffer: DeviceSlice<_> = device_buffer.index(header_len..); buffer.copy_from(std::slice::from_raw_parts(self.as_ptr().cast(), self.len()))?; + + let header: DeviceSlice<_> = device_buffer.index(..header_len); let header = DeviceSlice::from_raw_parts_mut( - DevicePointer::wrap(header.as_mut_ptr().cast::()), + header.as_device_ptr().cast::(), header.len() * std::mem::size_of::(), ); - let (_, header) = header.split_at_mut(header.len() - offset); - let (header, _) = header.split_at_mut(std::mem::size_of::<_ArcInnerHeader>()); - #[expect(clippy::cast_ptr_alignment)] + let header = header.index((header.len() - offset)..); + let header = header.index(..std::mem::size_of::<_ArcInnerHeader>()); let mut header: ManuallyDrop> = ManuallyDrop::new( - DeviceBox::from_raw(header.as_mut_ptr().cast::<_ArcInnerHeader>()), + DeviceBox::from_device(header.as_device_ptr().cast::<_ArcInnerHeader>()), ); header.copy_from(&*arc_ptr.cast::<_ArcInnerHeader>())?; @@ -152,7 +152,7 @@ unsafe impl RustToCudaAsync for Arc<[ let offset = std::mem::offset_of!(_ArcInner<[T; 42]>, data); let arc_ptr: *const _ArcInner<[T; 42]> = data_ptr.byte_sub(offset).cast(); - let header_len = (offset + (std::mem::align_of::() - 1)) / std::mem::align_of::(); + let header_len = offset.div_ceil(std::mem::align_of::()); let locked_buffer = unsafe { let mut locked_buffer = diff --git a/src/lend/impls/mod.rs b/src/lend/impls/mod.rs index 13ee7d6a0..7f7af6ad2 100644 --- a/src/lend/impls/mod.rs +++ b/src/lend/impls/mod.rs @@ -1,5 +1,5 @@ mod arc; -// mod arced_slice; +mod arced_slice; mod r#box; mod boxed_slice; #[cfg(feature = "final")] From e04c308e9677a2ad24a86f1bd014521fb7224395 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Sun, 9 Feb 2025 09:36:54 +0000 Subject: [PATCH 05/10] Fix clippy lints --- src/kernel/param.rs | 8 ++++---- src/lend/impls/slice_ref.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/kernel/param.rs b/src/kernel/param.rs index 34cd03f6b..9ab27fa1b 100644 --- a/src/kernel/param.rs +++ b/src/kernel/param.rs @@ -157,7 +157,7 @@ impl< { } -#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl< 'a, T: Sync + crate::safety::StackOnly + crate::safety::PortableBitSemantics + TypeGraphLayout, @@ -373,7 +373,7 @@ impl< } } -#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl< 'a, T: Sync @@ -617,7 +617,7 @@ impl< { } -#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow { #[cfg(feature = "host")] type AsyncHostType<'stream, 'b> @@ -709,7 +709,7 @@ impl<'a, T: Sync + RustToCuda> CudaKernelParameter for &'a DeepPerThreadBorrow sealed::Sealed for &DeepPerThreadBorrow {} -#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] impl<'a, T: Sync + RustToCuda + SafeMutableAliasing> CudaKernelParameter for &'a mut DeepPerThreadBorrow { diff --git a/src/lend/impls/slice_ref.rs b/src/lend/impls/slice_ref.rs index 062058668..400ef0669 100644 --- a/src/lend/impls/slice_ref.rs +++ b/src/lend/impls/slice_ref.rs @@ -73,7 +73,7 @@ unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCuda for &'a [T } } -#[cfg_attr(feature = "device", expect(clippy::needless_lifetimes))] +#[cfg_attr(not(feature = "host"), expect(clippy::needless_lifetimes))] unsafe impl<'a, T: PortableBitSemantics + TypeGraphLayout> RustToCudaAsync for &'a [T] { #[cfg(all(feature = "host", not(doc)))] type CudaAllocationAsync = CombinedCudaAlloc< From 5b850130c5efeabe8d8dd818f44f38cb00538bbf Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 11 Feb 2025 08:07:12 +0000 Subject: [PATCH 06/10] Remove extraneous cust/bytemuck feature --- Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c0e307352..ac41c8150 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,8 +97,7 @@ kernel = ["dep:rust-cuda-kernel"] [dependencies] const-type-layout = { version = "0.3.2", default-features = false, features = ["derive"] } -# FIXME: cust fails to compile without the `bytemuck` feature -cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false, features = ["bytemuck"], optional = true } +cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false, optional = true } cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } cust_derive = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.2", default-features = false, optional = true } final = { version = "0.1.1", default-features = false, optional = true } From a3fcdaad182034ad4d8f5007463e6a86b6e84c0a Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 11 Feb 2025 08:12:51 +0000 Subject: [PATCH 07/10] Update dependencies --- Cargo.toml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ac41c8150..4afadb7b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,12 +24,13 @@ rust-cuda-derive = { version = "0.1", path = "rust-cuda-derive", default-feature rust-cuda-kernel = { version = "0.1", path = "rust-cuda-kernel", default-features = false } # third-party dependencies with unpublished patches -rustacuda = { git = "https://github.com/juntyr/RustaCUDA", rev = "c6ea7cc", default-features = false } -rustacuda_core = { git = "https://github.com/juntyr/RustaCUDA", rev = "c6ea7cc", default-features = false } +cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false } +cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } +cust_derive = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.2", default-features = false } # crates.io third-party dependencies cargo_metadata = { version = "0.19", default-features = false } -cargo-util = { version = "=0.2.16", default-features = false } # TODO: keep in sync with toolchain +cargo-util = { version = "=0.2.17", default-features = false } # TODO: keep in sync with toolchain colored = { version = "3.0", default-features = false } const-type-layout = { version = "0.3.2", default-features = false } final = { version = "0.1.1", default-features = false } @@ -96,14 +97,13 @@ host = ["dep:cust", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] kernel = ["dep:rust-cuda-kernel"] [dependencies] -const-type-layout = { version = "0.3.2", default-features = false, features = ["derive"] } -cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false, optional = true } -cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } -cust_derive = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.2", default-features = false, optional = true } -final = { version = "0.1.1", default-features = false, optional = true } -oneshot = { version = "0.1", default-features = false, features = ["std", "async"], optional = true } -regex = { version = "1.10", default-features = false, optional = true } -safer_owning_ref = { version = "0.5", default-features = false, optional = true } - -rust-cuda-derive = { path = "rust-cuda-derive", default-features = false, optional = true } -rust-cuda-kernel = { path = "rust-cuda-kernel", default-features = false, optional = true } +const-type-layout = { workspace = true, features = ["derive"] } +cust = { workspace = true, optional = true } +cust_core = { workspace = true } +cust_derive = { workspace = true, optional = true } +final = { workspace = true, optional = true } +oneshot = { workspace = true, features = ["std", "async"], optional = true } +regex = { workspace = true, optional = true } +rust-cuda-derive = { workspace = true, optional = true } +rust-cuda-kernel = { workspace = true, optional = true } +safer_owning_ref = { workspace = true, optional = true } From 3d2bf614a3dce39be1519f1dbcfdfc3438dae4fc Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 11 Feb 2025 08:23:55 +0000 Subject: [PATCH 08/10] Remove the extraneous cust-derive dependency --- .github/workflows/rustdoc.yml | 1 - Cargo.toml | 7 ++++--- src/host/mod.rs | 10 ++++------ src/lend/impls/arc.rs | 1 + src/safety/portable.rs | 9 ++++++--- 5 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.github/workflows/rustdoc.yml b/.github/workflows/rustdoc.yml index f0d7b683b..f62503604 100644 --- a/.github/workflows/rustdoc.yml +++ b/.github/workflows/rustdoc.yml @@ -39,7 +39,6 @@ jobs: --extern-html-root-url final=https://docs.rs/final/0.1.1/ \ --extern-html-root-url cust=https://docs.rs/cust/0.3.2/ \ --extern-html-root-url cust_core=https://docs.rs/cust_core/0.1/ \ - --extern-html-root-url cust_derive=https://docs.rs/cust_derive/0.2/ \ -Zunstable-options \ " cargo doc \ --all-features \ diff --git a/Cargo.toml b/Cargo.toml index 4afadb7b3..79705b409 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,6 @@ rust-cuda-kernel = { version = "0.1", path = "rust-cuda-kernel", default-feature # third-party dependencies with unpublished patches cust = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.3.2", default-features = false } cust_core = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.1", default-features = false } -cust_derive = { git = "https://github.com/juntyr/Rust-GPU-CUDA.git", rev = "5365c14", version = "0.2", default-features = false } # crates.io third-party dependencies cargo_metadata = { version = "0.19", default-features = false } @@ -90,7 +89,7 @@ rust-version = { workspace = true } [features] default = [] -derive = ["dep:cust_derive", "dep:rust-cuda-derive"] +derive = ["dep:rust-cuda-derive"] device = [] final = ["dep:final"] host = ["dep:cust", "dep:regex", "dep:oneshot", "dep:safer_owning_ref"] @@ -100,10 +99,12 @@ kernel = ["dep:rust-cuda-kernel"] const-type-layout = { workspace = true, features = ["derive"] } cust = { workspace = true, optional = true } cust_core = { workspace = true } -cust_derive = { workspace = true, optional = true } final = { workspace = true, optional = true } oneshot = { workspace = true, features = ["std", "async"], optional = true } regex = { workspace = true, optional = true } rust-cuda-derive = { workspace = true, optional = true } rust-cuda-kernel = { workspace = true, optional = true } safer_owning_ref = { workspace = true, optional = true } + +[lints] +workspace = true diff --git a/src/host/mod.rs b/src/host/mod.rs index c97452438..8c42d80cf 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -205,6 +205,7 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> { } } + #[allow(clippy::needless_pass_by_ref_mut)] #[must_use] pub(crate) fn for_device<'b>(&'b mut self) -> DeviceMutRef<'a, T> where @@ -244,18 +245,15 @@ impl<'a, T: PortableBitSemantics + TypeGraphLayout> HostAndDeviceMutRef<'a, T> { } #[must_use] - pub fn into_mut<'b>(self) -> HostAndDeviceMutRef<'b, T> + pub const fn into_mut<'b>(self) -> HostAndDeviceMutRef<'b, T> where 'a: 'b, { - HostAndDeviceMutRef { - device_box: self.device_box, - host_ref: self.host_ref, - } + self } #[must_use] - pub fn into_async<'b, 'stream>( + pub const fn into_async<'b, 'stream>( self, stream: Stream<'stream>, ) -> Async<'b, 'stream, HostAndDeviceMutRef<'b, T>, NoCompletion> diff --git a/src/lend/impls/arc.rs b/src/lend/impls/arc.rs index ec5527330..b08ba6342 100644 --- a/src/lend/impls/arc.rs +++ b/src/lend/impls/arc.rs @@ -107,6 +107,7 @@ unsafe impl RustToCudaAsync for Arc { - /// Types whose in-memory bit representation on the CPU host is safe to copy - /// to and read back on the GPU device while maintaining the same semantics, - /// iff the type layout on the CPU matches the type layout on the GPU. + /// Types with a CPU-GPU-compatible memory representation. + /// + /// More specifically, types in-memory bit representation on the CPU host + /// is safe to copy to and read back on the GPU device while maintaining + /// the same semantics, iff the type layout on the CPU matches the type + /// layout on the GPU. /// /// For a type to implement [`PortableBitSemantics`], it /// From 8dc39c8a6bdc817cefd197300e537bad7bb7b882 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 11 Feb 2025 08:42:24 +0000 Subject: [PATCH 09/10] Clean up the cust_core dependency --- src/host/mod.rs | 7 +++---- src/lend/impls/arced_slice.rs | 11 +++++------ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/host/mod.rs b/src/host/mod.rs index 8c42d80cf..3ae4f0353 100644 --- a/src/host/mod.rs +++ b/src/host/mod.rs @@ -9,10 +9,9 @@ use cust::{ context::Context, error::CudaError, event::Event, - memory::{CopyDestination, DeviceBox, DeviceBuffer, LockedBox, LockedBuffer}, + memory::{CopyDestination, DeviceBox, DeviceBuffer, DeviceCopy, LockedBox, LockedBuffer}, module::Module, }; -use cust_core::DeviceCopy; use crate::{ safety::PortableBitSemantics, @@ -119,7 +118,7 @@ impl CudaDroppable for DeviceBox { } } -impl CudaDroppable for DeviceBuffer { +impl CudaDroppable for DeviceBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } @@ -133,7 +132,7 @@ impl CudaDroppable for LockedBox { } } -impl CudaDroppable for LockedBuffer { +impl CudaDroppable for LockedBuffer { fn drop(val: Self) -> Result<(), (CudaError, Self)> { Self::drop(val) } diff --git a/src/lend/impls/arced_slice.rs b/src/lend/impls/arced_slice.rs index 672cc2a6b..76d3f15cd 100644 --- a/src/lend/impls/arced_slice.rs +++ b/src/lend/impls/arced_slice.rs @@ -8,9 +8,8 @@ use const_type_layout::{TypeGraphLayout, TypeLayout}; use cust::{ error::CudaResult, memory::LockedBuffer, - memory::{DeviceBox, DeviceBuffer}, + memory::{DeviceBox, DeviceBuffer, DeviceCopy}, }; -use cust_core::DeviceCopy; use crate::{ deps::alloc::sync::Arc, @@ -50,21 +49,21 @@ pub struct _ArcInner { data: T, } -#[derive(Copy, Clone)] +#[cfg(feature = "host")] +#[derive(Copy, Clone, DeviceCopy)] #[repr(C)] struct _ArcInnerHeader { strong: _AtomicUsize, weak: _AtomicUsize, } -#[derive(Copy, Clone)] +#[cfg(feature = "host")] +#[derive(Copy, Clone, DeviceCopy)] #[repr(C, align(8))] struct _AtomicUsize { v: usize, } -unsafe impl DeviceCopy for _ArcInnerHeader {} - unsafe impl RustToCuda for Arc<[T]> { #[cfg(all(feature = "host", not(doc)))] type CudaAllocation = CudaDropWrapper>>; From 43937b0a57090de54f325de1535544f91cd4dcb6 Mon Sep 17 00:00:00 2001 From: Juniper Tyree Date: Tue, 11 Feb 2025 09:14:20 +0000 Subject: [PATCH 10/10] Bump MSRV to 1.84-nightly --- Cargo.toml | 2 +- README.md | 2 +- rust-cuda-derive/src/lib.rs | 2 +- rust-cuda-kernel/build.rs | 2 +- rust-cuda-kernel/src/kernel/link/mod.rs | 5 ++--- rust-cuda-kernel/src/lib.rs | 2 +- rust-toolchain | 4 ++-- src/lib.rs | 2 +- src/utils/shared/slice.rs | 6 +++--- 9 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 79705b409..6ec09e455 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,7 @@ edition = "2021" authors = ["Juniper Tyree "] repository = "https://github.com/juntyr/rust-cuda" license = "MIT OR Apache-2.0" -rust-version = "1.81" # nightly +rust-version = "1.84" # nightly [workspace.dependencies] # workspace-internal crates diff --git a/README.md b/README.md index 4140f9e4b..ebc3a5a0b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain -[MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +[MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange [repo]: https://github.com/juntyr/rust-cuda [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-cuda-derive/src/lib.rs b/rust-cuda-derive/src/lib.rs index cc371f18c..2cfb62949 100644 --- a/rust-cuda-derive/src/lib.rs +++ b/rust-cuda-derive/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-cuda-kernel/build.rs b/rust-cuda-kernel/build.rs index ecd3b29cb..65b149df9 100644 --- a/rust-cuda-kernel/build.rs +++ b/rust-cuda-kernel/build.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-cuda-kernel/src/kernel/link/mod.rs b/rust-cuda-kernel/src/kernel/link/mod.rs index 8b4a549bb..f0a6bd154 100644 --- a/rust-cuda-kernel/src/kernel/link/mod.rs +++ b/rust-cuda-kernel/src/kernel/link/mod.rs @@ -189,7 +189,6 @@ fn extract_ptx_kernel_layout(kernel_ptx: &mut String) -> proc_macro2::TokenStrea ); } - #[allow(clippy::literal_string_with_formatting_args)] // false positive if type_layout_metas .insert(String::from(param), bytes) .is_some() @@ -484,7 +483,7 @@ fn check_kernel_ptx( } if ptx_lint_levels .get(&PtxLint::DynamicStackSize) - .map_or(true, |level| *level <= LintLevel::Warn) + .is_none_or(|level| *level <= LintLevel::Warn) { options.push(c"--suppress-stack-size-warning"); } @@ -530,7 +529,7 @@ fn check_kernel_ptx( } if ptx_lint_levels .get(&PtxLint::DynamicStackSize) - .map_or(true, |level| *level < LintLevel::Warn) + .is_none_or(|level| *level < LintLevel::Warn) { options.push(c"--suppress-stack-size-warning"); } diff --git a/rust-cuda-kernel/src/lib.rs b/rust-cuda-kernel/src/lib.rs index 1714bddcd..e2b198153 100644 --- a/rust-cuda-kernel/src/lib.rs +++ b/rust-cuda-kernel/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/rust-toolchain b/rust-toolchain index 2404f256b..5b8ab7dea 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -1,5 +1,5 @@ [toolchain] -# Pin to final 1.85.0 nightly -channel = "nightly-2025-01-03" +# Pin to final 1.84.0 nightly +channel = "nightly-2024-11-22" components = [ "cargo", "rustfmt", "clippy", "llvm-bitcode-linker", "llvm-tools" ] targets = [ "nvptx64-nvidia-cuda" ] diff --git a/src/lib.rs b/src/lib.rs index 0511e0191..1a1bebd63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,7 +5,7 @@ //! [CI Status]: https://img.shields.io/github/actions/workflow/status/juntyr/rust-cuda/ci.yml?branch=main //! [workflow]: https://github.com/juntyr/rust-cuda/actions/workflows/ci.yml?query=branch%3Amain //! -//! [MSRV]: https://img.shields.io/badge/MSRV-1.81.0--nightly-orange +//! [MSRV]: https://img.shields.io/badge/MSRV-1.84.0--nightly-orange //! [repo]: https://github.com/juntyr/rust-cuda //! //! [Rust Doc]: https://img.shields.io/badge/docs-main-blue diff --git a/src/utils/shared/slice.rs b/src/utils/shared/slice.rs index f239ce7a8..a691bd2ea 100644 --- a/src/utils/shared/slice.rs +++ b/src/utils/shared/slice.rs @@ -10,7 +10,7 @@ pub struct ThreadBlockSharedSlice { impl ThreadBlockSharedSlice { #[cfg(feature = "host")] #[must_use] - pub fn new_uninit_with_len(len: usize) -> Self { + pub const fn new_uninit_with_len(len: usize) -> Self { Self { shared: Self::dangling_slice_with_len(len), } @@ -18,7 +18,7 @@ impl ThreadBlockSharedSlice { #[cfg(feature = "host")] #[must_use] - pub fn with_len(mut self, len: usize) -> Self { + pub const fn with_len(mut self, len: usize) -> Self { self.shared = Self::dangling_slice_with_len(len); self } @@ -31,7 +31,7 @@ impl ThreadBlockSharedSlice { } #[cfg(feature = "host")] - fn dangling_slice_with_len(len: usize) -> *mut [T] { + const fn dangling_slice_with_len(len: usize) -> *mut [T] { core::ptr::slice_from_raw_parts_mut(core::ptr::NonNull::dangling().as_ptr(), len) }