From ba036b8b2c373b1f475475162b2628c31b9b7be4 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Mon, 16 Dec 2024 11:03:15 +0800 Subject: [PATCH 1/2] Wrap vec size 8 with `USE_ROCM` --- aten/src/ATen/native/cuda/CUDALoops.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aten/src/ATen/native/cuda/CUDALoops.cuh b/aten/src/ATen/native/cuda/CUDALoops.cuh index 94417bae44921..c844b4f1372e9 100644 --- a/aten/src/ATen/native/cuda/CUDALoops.cuh +++ b/aten/src/ATen/native/cuda/CUDALoops.cuh @@ -116,11 +116,13 @@ static inline void launch_vectorized_kernel( int vec_size = memory::can_vectorize_up_to(data); switch (vec_size) { +#ifdef USE_ROCM case 8: vectorized_elementwise_kernel<8, func_t, array_t> <<>>(N, f, data); C10_CUDA_KERNEL_LAUNCH_CHECK(); break; +#endif case 4: vectorized_elementwise_kernel<4, func_t, array_t> <<>>(N, f, data); From f69cc7e407f3d5bae55a4e301bc241f494a690e3 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Mon, 16 Dec 2024 11:44:49 +0800 Subject: [PATCH 2/2] update --- aten/src/ATen/native/cuda/Dropout.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aten/src/ATen/native/cuda/Dropout.cu b/aten/src/ATen/native/cuda/Dropout.cu index 9955a90b0b8d9..1a87fa993a6fa 100644 --- a/aten/src/ATen/native/cuda/Dropout.cu +++ b/aten/src/ATen/native/cuda/Dropout.cu @@ -281,6 +281,7 @@ inline void launcher( if (vec_size > 1) { switch (vec_size) { +#ifdef USE_ROCM case 8: fused_dropout_kernel_vec< scalar_t, @@ -297,6 +298,7 @@ inline void launcher( rng_engine_inputs); C10_CUDA_KERNEL_LAUNCH_CHECK(); break; +#endif case 4: fused_dropout_kernel_vec< scalar_t,