diff --git a/aten/src/ATen/native/cuda/CUDALoops.cuh b/aten/src/ATen/native/cuda/CUDALoops.cuh index 94417bae44921..c844b4f1372e9 100644 --- a/aten/src/ATen/native/cuda/CUDALoops.cuh +++ b/aten/src/ATen/native/cuda/CUDALoops.cuh @@ -116,11 +116,13 @@ static inline void launch_vectorized_kernel( int vec_size = memory::can_vectorize_up_to(data); switch (vec_size) { +#ifdef USE_ROCM case 8: vectorized_elementwise_kernel<8, func_t, array_t> <<>>(N, f, data); C10_CUDA_KERNEL_LAUNCH_CHECK(); break; +#endif case 4: vectorized_elementwise_kernel<4, func_t, array_t> <<>>(N, f, data); diff --git a/aten/src/ATen/native/cuda/Dropout.cu b/aten/src/ATen/native/cuda/Dropout.cu index 9955a90b0b8d9..1a87fa993a6fa 100644 --- a/aten/src/ATen/native/cuda/Dropout.cu +++ b/aten/src/ATen/native/cuda/Dropout.cu @@ -281,6 +281,7 @@ inline void launcher( if (vec_size > 1) { switch (vec_size) { +#ifdef USE_ROCM case 8: fused_dropout_kernel_vec< scalar_t, @@ -297,6 +298,7 @@ inline void launcher( rng_engine_inputs); C10_CUDA_KERNEL_LAUNCH_CHECK(); break; +#endif case 4: fused_dropout_kernel_vec< scalar_t,