Skip to content

Commit 7c154da

Browse files
jhuber6tstellar
authored andcommitted
[Clang] Fix GPU intrinsic helpers incorrectly sign extending (#129560)
Summary: These return values are actually signed, meaning that casting will extend it and then all the bits will be one. (cherry picked from commit 4ca8ea8)
1 parent 9467804 commit 7c154da

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

clang/lib/Headers/amdgpuintrin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) {
121121
uint32_t __hi = (uint32_t)(__x >> 32ull);
122122
uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF);
123123
return ((uint64_t)__builtin_amdgcn_readfirstlane(__hi) << 32ull) |
124-
((uint64_t)__builtin_amdgcn_readfirstlane(__lo));
124+
((uint64_t)__builtin_amdgcn_readfirstlane(__lo) & 0xFFFFFFFF);
125125
}
126126

127127
// Returns a bitmask of threads in the current lane for which \p x is true.

clang/lib/Headers/nvptxintrin.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,8 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) {
127127
__gpu_num_lanes() - 1)
128128
<< 32ull) |
129129
((uint64_t)__nvvm_shfl_sync_idx_i32(__mask, __lo, __id,
130-
__gpu_num_lanes() - 1));
130+
__gpu_num_lanes() - 1) &
131+
0xFFFFFFFF);
131132
}
132133

133134
// Returns a bitmask of threads in the current lane for which \p x is true.

0 commit comments

Comments
 (0)