@@ -1033,7 +1033,7 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %ou
1033
1033
; SI-NEXT: s_mov_b32 s2, -1
1034
1034
; SI-NEXT: s_waitcnt lgkmcnt(0)
1035
1035
; SI-NEXT: s_lshl_b32 s5, s6, 3
1036
- ; SI-NEXT: s_lshl_b32 s5, -1 , s5
1036
+ ; SI-NEXT: s_lshl_b32 s5, 0xff , s5
1037
1037
; SI-NEXT: s_andn2_b32 s4, s4, s5
1038
1038
; SI-NEXT: s_and_b32 s5, s5, 0x505
1039
1039
; SI-NEXT: s_or_b32 s4, s5, s4
@@ -1046,14 +1046,15 @@ define amdgpu_kernel void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %ou
1046
1046
; VI-NEXT: s_load_dword s6, s[4:5], 0x4c
1047
1047
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
1048
1048
; VI-NEXT: s_load_dword s4, s[4:5], 0x28
1049
+ ; VI-NEXT: v_mov_b32_e32 v0, 0xff
1049
1050
; VI-NEXT: s_mov_b32 s3, 0x1100f000
1050
- ; VI-NEXT: s_mov_b32 s2, -1
1051
1051
; VI-NEXT: s_waitcnt lgkmcnt(0)
1052
1052
; VI-NEXT: s_lshl_b32 s5, s6, 3
1053
- ; VI-NEXT: v_lshlrev_b16_e64 v0, s5, -1
1053
+ ; VI-NEXT: v_lshlrev_b16_e32 v0, s5, v0
1054
1054
; VI-NEXT: v_not_b32_e32 v1, v0
1055
1055
; VI-NEXT: v_and_b32_e32 v1, s4, v1
1056
1056
; VI-NEXT: v_and_b32_e32 v0, 0x505, v0
1057
+ ; VI-NEXT: s_mov_b32 s2, -1
1057
1058
; VI-NEXT: v_or_b32_e32 v0, v0, v1
1058
1059
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
1059
1060
; VI-NEXT: s_endpgm
@@ -1074,7 +1075,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %ou
1074
1075
; SI-NEXT: s_mov_b32 s2, -1
1075
1076
; SI-NEXT: s_waitcnt lgkmcnt(0)
1076
1077
; SI-NEXT: s_lshl_b32 s5, s6, 3
1077
- ; SI-NEXT: s_lshl_b32 s5, 0xffff , s5
1078
+ ; SI-NEXT: s_lshl_b32 s5, 0xff , s5
1078
1079
; SI-NEXT: s_andn2_b32 s4, s4, s5
1079
1080
; SI-NEXT: s_and_b32 s5, s5, 0x5050505
1080
1081
; SI-NEXT: s_or_b32 s4, s5, s4
@@ -1094,7 +1095,7 @@ define amdgpu_kernel void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %ou
1094
1095
; VI-NEXT: s_mov_b32 s2, -1
1095
1096
; VI-NEXT: s_waitcnt lgkmcnt(0)
1096
1097
; VI-NEXT: s_lshl_b32 s5, s6, 3
1097
- ; VI-NEXT: s_lshl_b32 s5, 0xffff , s5
1098
+ ; VI-NEXT: s_lshl_b32 s5, 0xff , s5
1098
1099
; VI-NEXT: s_andn2_b32 s4, s4, s5
1099
1100
; VI-NEXT: s_and_b32 s5, s5, 0x5050505
1100
1101
; VI-NEXT: s_or_b32 s4, s5, s4
@@ -1119,7 +1120,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %ou
1119
1120
; SI-NEXT: s_mov_b32 s2, -1
1120
1121
; SI-NEXT: s_waitcnt lgkmcnt(0)
1121
1122
; SI-NEXT: s_lshl_b32 s5, s6, 3
1122
- ; SI-NEXT: s_lshl_b32 s5, 0xffff , s5
1123
+ ; SI-NEXT: s_lshl_b32 s5, 0xff , s5
1123
1124
; SI-NEXT: s_andn2_b32 s4, s4, s5
1124
1125
; SI-NEXT: s_and_b32 s5, s5, 0x5050505
1125
1126
; SI-NEXT: s_or_b32 s4, s5, s4
@@ -1136,7 +1137,7 @@ define amdgpu_kernel void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %ou
1136
1137
; VI-NEXT: s_mov_b32 s2, -1
1137
1138
; VI-NEXT: s_waitcnt lgkmcnt(0)
1138
1139
; VI-NEXT: s_lshl_b32 s5, s6, 3
1139
- ; VI-NEXT: s_lshl_b32 s5, 0xffff , s5
1140
+ ; VI-NEXT: s_lshl_b32 s5, 0xff , s5
1140
1141
; VI-NEXT: s_andn2_b32 s4, s4, s5
1141
1142
; VI-NEXT: s_and_b32 s5, s5, 0x5050505
1142
1143
; VI-NEXT: s_or_b32 s4, s5, s4
@@ -1160,7 +1161,7 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %
1160
1161
; SI-NEXT: s_mov_b32 s5, s1
1161
1162
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
1162
1163
; SI-NEXT: s_lshl_b32 s8, s8, 3
1163
- ; SI-NEXT: s_mov_b64 s[2:3], 0xffff
1164
+ ; SI-NEXT: s_mov_b64 s[2:3], 0xff
1164
1165
; SI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
1165
1166
; SI-NEXT: s_and_b32 s9, s3, 0x5050505
1166
1167
; SI-NEXT: s_and_b32 s8, s2, 0x5050505
@@ -1183,7 +1184,7 @@ define amdgpu_kernel void @s_dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %
1183
1184
; VI-NEXT: s_mov_b32 s5, s1
1184
1185
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
1185
1186
; VI-NEXT: s_lshl_b32 s8, s8, 3
1186
- ; VI-NEXT: s_mov_b64 s[2:3], 0xffff
1187
+ ; VI-NEXT: s_mov_b64 s[2:3], 0xff
1187
1188
; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s8
1188
1189
; VI-NEXT: s_and_b32 s9, s3, 0x5050505
1189
1190
; VI-NEXT: s_and_b32 s8, s2, 0x5050505
0 commit comments