|
4 | 4 |
|
5 | 5 | define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) {
|
6 | 6 | ; SSE-LABEL: @trunc_through_one_add(
|
7 |
| -; SSE-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 |
8 |
| -; SSE-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i16> |
9 |
| -; SSE-NEXT: [[TMP6:%.*]] = lshr <8 x i16> [[TMP5]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
10 |
| -; SSE-NEXT: [[TMP7:%.*]] = add nuw nsw <8 x i16> [[TMP6]], [[TMP5]] |
11 |
| -; SSE-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[TMP7]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
12 |
| -; SSE-NEXT: store <8 x i16> [[TMP8]], ptr [[TMP0:%.*]], align 2 |
13 |
| -; SSE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 |
14 |
| -; SSE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 8 |
15 |
| -; SSE-NEXT: [[TMP13:%.*]] = load <8 x i8>, ptr [[TMP10]], align 1 |
16 |
| -; SSE-NEXT: [[TMP14:%.*]] = zext <8 x i8> [[TMP13]] to <8 x i16> |
17 |
| -; SSE-NEXT: [[TMP15:%.*]] = lshr <8 x i16> [[TMP14]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
18 |
| -; SSE-NEXT: [[TMP16:%.*]] = add nuw nsw <8 x i16> [[TMP15]], [[TMP14]] |
19 |
| -; SSE-NEXT: [[TMP17:%.*]] = lshr <8 x i16> [[TMP16]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
20 |
| -; SSE-NEXT: store <8 x i16> [[TMP17]], ptr [[TMP11]], align 2 |
| 7 | +; SSE-NEXT: [[TMP3:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 |
| 8 | +; SSE-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i16> |
| 9 | +; SSE-NEXT: [[TMP5:%.*]] = lshr <8 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 10 | +; SSE-NEXT: [[TMP6:%.*]] = add nuw nsw <8 x i16> [[TMP5]], [[TMP4]] |
| 11 | +; SSE-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[TMP6]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 12 | +; SSE-NEXT: store <8 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 |
| 13 | +; SSE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 |
| 14 | +; SSE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 8 |
| 15 | +; SSE-NEXT: [[TMP10:%.*]] = load <8 x i8>, ptr [[TMP8]], align 1 |
| 16 | +; SSE-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[TMP10]] to <8 x i16> |
| 17 | +; SSE-NEXT: [[TMP12:%.*]] = lshr <8 x i16> [[TMP11]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 18 | +; SSE-NEXT: [[TMP13:%.*]] = add nuw nsw <8 x i16> [[TMP12]], [[TMP11]] |
| 19 | +; SSE-NEXT: [[TMP14:%.*]] = lshr <8 x i16> [[TMP13]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 20 | +; SSE-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP9]], align 2 |
21 | 21 | ; SSE-NEXT: ret void
|
22 | 22 | ;
|
23 | 23 | ; AVX-LABEL: @trunc_through_one_add(
|
24 |
| -; AVX-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 |
25 |
| -; AVX-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[TMP4]] to <16 x i16> |
26 |
| -; AVX-NEXT: [[TMP6:%.*]] = lshr <16 x i16> [[TMP5]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
27 |
| -; AVX-NEXT: [[TMP7:%.*]] = add nuw nsw <16 x i16> [[TMP6]], [[TMP5]] |
28 |
| -; AVX-NEXT: [[TMP8:%.*]] = lshr <16 x i16> [[TMP7]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
29 |
| -; AVX-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP0:%.*]], align 2 |
| 24 | +; AVX-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 |
| 25 | +; AVX-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[TMP3]] to <16 x i16> |
| 26 | +; AVX-NEXT: [[TMP5:%.*]] = lshr <16 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 27 | +; AVX-NEXT: [[TMP6:%.*]] = add nuw nsw <16 x i16> [[TMP5]], [[TMP4]] |
| 28 | +; AVX-NEXT: [[TMP7:%.*]] = lshr <16 x i16> [[TMP6]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 29 | +; AVX-NEXT: store <16 x i16> [[TMP7]], ptr [[TMP0:%.*]], align 2 |
30 | 30 | ; AVX-NEXT: ret void
|
31 | 31 | ;
|
32 | 32 | %3 = load i8, ptr %1, align 1
|
@@ -176,39 +176,39 @@ define void @trunc_through_one_add(ptr noalias %0, ptr noalias readonly %1) {
|
176 | 176 |
|
177 | 177 | define void @trunc_through_two_adds(ptr noalias %0, ptr noalias readonly %1, ptr noalias readonly %2) {
|
178 | 178 | ; SSE-LABEL: @trunc_through_two_adds(
|
179 |
| -; SSE-NEXT: [[TMP5:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 |
180 |
| -; SSE-NEXT: [[TMP6:%.*]] = zext <8 x i8> [[TMP5]] to <8 x i16> |
181 |
| -; SSE-NEXT: [[TMP8:%.*]] = load <8 x i8>, ptr [[TMP2:%.*]], align 1 |
182 |
| -; SSE-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[TMP8]] to <8 x i16> |
183 |
| -; SSE-NEXT: [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP6]] |
184 |
| -; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
185 |
| -; SSE-NEXT: [[TMP12:%.*]] = add nuw nsw <8 x i16> [[TMP11]], [[TMP10]] |
186 |
| -; SSE-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[TMP12]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
187 |
| -; SSE-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP0:%.*]], align 2 |
188 |
| -; SSE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 |
189 |
| -; SSE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8 |
190 |
| -; SSE-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 8 |
191 |
| -; SSE-NEXT: [[TMP19:%.*]] = load <8 x i8>, ptr [[TMP15]], align 1 |
192 |
| -; SSE-NEXT: [[TMP20:%.*]] = zext <8 x i8> [[TMP19]] to <8 x i16> |
193 |
| -; SSE-NEXT: [[TMP22:%.*]] = load <8 x i8>, ptr [[TMP16]], align 1 |
194 |
| -; SSE-NEXT: [[TMP23:%.*]] = zext <8 x i8> [[TMP22]] to <8 x i16> |
195 |
| -; SSE-NEXT: [[TMP24:%.*]] = add nuw nsw <8 x i16> [[TMP23]], [[TMP20]] |
196 |
| -; SSE-NEXT: [[TMP25:%.*]] = lshr <8 x i16> [[TMP24]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
197 |
| -; SSE-NEXT: [[TMP26:%.*]] = add nuw nsw <8 x i16> [[TMP25]], [[TMP24]] |
198 |
| -; SSE-NEXT: [[TMP27:%.*]] = lshr <8 x i16> [[TMP26]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
199 |
| -; SSE-NEXT: store <8 x i16> [[TMP27]], ptr [[TMP17]], align 2 |
| 179 | +; SSE-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[TMP1:%.*]], align 1 |
| 180 | +; SSE-NEXT: [[TMP5:%.*]] = zext <8 x i8> [[TMP4]] to <8 x i16> |
| 181 | +; SSE-NEXT: [[TMP6:%.*]] = load <8 x i8>, ptr [[TMP2:%.*]], align 1 |
| 182 | +; SSE-NEXT: [[TMP7:%.*]] = zext <8 x i8> [[TMP6]] to <8 x i16> |
| 183 | +; SSE-NEXT: [[TMP8:%.*]] = add nuw nsw <8 x i16> [[TMP7]], [[TMP5]] |
| 184 | +; SSE-NEXT: [[TMP9:%.*]] = lshr <8 x i16> [[TMP8]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 185 | +; SSE-NEXT: [[TMP10:%.*]] = add nuw nsw <8 x i16> [[TMP9]], [[TMP8]] |
| 186 | +; SSE-NEXT: [[TMP11:%.*]] = lshr <8 x i16> [[TMP10]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 187 | +; SSE-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 |
| 188 | +; SSE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 |
| 189 | +; SSE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 8 |
| 190 | +; SSE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 8 |
| 191 | +; SSE-NEXT: [[TMP15:%.*]] = load <8 x i8>, ptr [[TMP12]], align 1 |
| 192 | +; SSE-NEXT: [[TMP16:%.*]] = zext <8 x i8> [[TMP15]] to <8 x i16> |
| 193 | +; SSE-NEXT: [[TMP17:%.*]] = load <8 x i8>, ptr [[TMP13]], align 1 |
| 194 | +; SSE-NEXT: [[TMP18:%.*]] = zext <8 x i8> [[TMP17]] to <8 x i16> |
| 195 | +; SSE-NEXT: [[TMP19:%.*]] = add nuw nsw <8 x i16> [[TMP18]], [[TMP16]] |
| 196 | +; SSE-NEXT: [[TMP20:%.*]] = lshr <8 x i16> [[TMP19]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 197 | +; SSE-NEXT: [[TMP21:%.*]] = add nuw nsw <8 x i16> [[TMP20]], [[TMP19]] |
| 198 | +; SSE-NEXT: [[TMP22:%.*]] = lshr <8 x i16> [[TMP21]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 199 | +; SSE-NEXT: store <8 x i16> [[TMP22]], ptr [[TMP14]], align 2 |
200 | 200 | ; SSE-NEXT: ret void
|
201 | 201 | ;
|
202 | 202 | ; AVX-LABEL: @trunc_through_two_adds(
|
203 |
| -; AVX-NEXT: [[TMP5:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 |
204 |
| -; AVX-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP5]] to <16 x i16> |
205 |
| -; AVX-NEXT: [[TMP8:%.*]] = load <16 x i8>, ptr [[TMP2:%.*]], align 1 |
206 |
| -; AVX-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[TMP8]] to <16 x i16> |
207 |
| -; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i16> [[TMP9]], [[TMP6]] |
208 |
| -; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
209 |
| -; AVX-NEXT: [[TMP12:%.*]] = add nuw nsw <16 x i16> [[TMP11]], [[TMP10]] |
210 |
| -; AVX-NEXT: [[TMP13:%.*]] = lshr <16 x i16> [[TMP12]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
211 |
| -; AVX-NEXT: store <16 x i16> [[TMP13]], ptr [[TMP0:%.*]], align 2 |
| 203 | +; AVX-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[TMP1:%.*]], align 1 |
| 204 | +; AVX-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[TMP4]] to <16 x i16> |
| 205 | +; AVX-NEXT: [[TMP6:%.*]] = load <16 x i8>, ptr [[TMP2:%.*]], align 1 |
| 206 | +; AVX-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[TMP6]] to <16 x i16> |
| 207 | +; AVX-NEXT: [[TMP8:%.*]] = add nuw nsw <16 x i16> [[TMP7]], [[TMP5]] |
| 208 | +; AVX-NEXT: [[TMP9:%.*]] = lshr <16 x i16> [[TMP8]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> |
| 209 | +; AVX-NEXT: [[TMP10:%.*]] = add nuw nsw <16 x i16> [[TMP9]], [[TMP8]] |
| 210 | +; AVX-NEXT: [[TMP11:%.*]] = lshr <16 x i16> [[TMP10]], <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> |
| 211 | +; AVX-NEXT: store <16 x i16> [[TMP11]], ptr [[TMP0:%.*]], align 2 |
212 | 212 | ; AVX-NEXT: ret void
|
213 | 213 | ;
|
214 | 214 | %4 = load i8, ptr %1, align 1
|
|
0 commit comments