Skip to content

Commit 80721e0

Browse files
authored
[X86][AVX10.2] Support AVX10.2-SATCVT new instructions. (#101599)
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
1 parent 59e1366 commit 80721e0

25 files changed

+13406
-24
lines changed

clang/include/clang/Basic/BuiltinsX86.def

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2179,6 +2179,44 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
21792179
TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
21802180
TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
21812181
TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
2182+
2183+
// AVX10.2 SATCVT
2184+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2185+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2186+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2187+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2188+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2189+
TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2190+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2191+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2192+
TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2193+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2194+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2195+
TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2196+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2197+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2198+
TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2199+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2200+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2201+
TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2202+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2203+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2204+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2205+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
2206+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
2207+
TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
2208+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2209+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2210+
TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2211+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
2212+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
2213+
TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
2214+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2215+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2216+
TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
2217+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
2218+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
2219+
TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
21822220
#undef BUILTIN
21832221
#undef TARGET_BUILTIN
21842222
#undef TARGET_HEADER_BUILTIN

clang/lib/Headers/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,10 @@ set(x86_files
149149
amxintrin.h
150150
avx10_2_512minmaxintrin.h
151151
avx10_2_512niintrin.h
152+
avx10_2_512satcvtintrin.h
152153
avx10_2minmaxintrin.h
153154
avx10_2niintrin.h
155+
avx10_2satcvtintrin.h
154156
avx2intrin.h
155157
avx512bf16intrin.h
156158
avx512bitalgintrin.h

clang/lib/Headers/avx10_2_512satcvtintrin.h

Lines changed: 301 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/Headers/avx10_2satcvtintrin.h

Lines changed: 444 additions & 0 deletions
Large diffs are not rendered by default.

clang/lib/Headers/immintrin.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,11 +651,13 @@ _storebe_i64(void * __P, long long __D) {
651651
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
652652
#include <avx10_2minmaxintrin.h>
653653
#include <avx10_2niintrin.h>
654+
#include <avx10_2satcvtintrin.h>
654655
#endif
655656

656657
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
657658
#include <avx10_2_512minmaxintrin.h>
658659
#include <avx10_2_512niintrin.h>
660+
#include <avx10_2_512satcvtintrin.h>
659661
#endif
660662

661663
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)

clang/lib/Sema/SemaX86.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
8888
case X86::BI__builtin_ia32_vgetexppd256_round_mask:
8989
case X86::BI__builtin_ia32_vgetexpps256_round_mask:
9090
case X86::BI__builtin_ia32_vgetexpph256_round_mask:
91+
case X86::BI__builtin_ia32_vcvttph2ibs256_mask:
92+
case X86::BI__builtin_ia32_vcvttph2iubs256_mask:
93+
case X86::BI__builtin_ia32_vcvttps2ibs256_mask:
94+
case X86::BI__builtin_ia32_vcvttps2iubs256_mask:
95+
case X86::BI__builtin_ia32_vcvttph2ibs512_mask:
96+
case X86::BI__builtin_ia32_vcvttph2iubs512_mask:
97+
case X86::BI__builtin_ia32_vcvttps2ibs512_mask:
98+
case X86::BI__builtin_ia32_vcvttps2iubs512_mask:
9199
ArgNum = 3;
92100
break;
93101
case X86::BI__builtin_ia32_cmppd512_mask:
@@ -302,6 +310,14 @@ bool SemaX86::CheckBuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
302310
case X86::BI__builtin_ia32_vcvtph2uqq256_round_mask:
303311
case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
304312
case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
313+
case X86::BI__builtin_ia32_vcvtph2ibs256_mask:
314+
case X86::BI__builtin_ia32_vcvtph2iubs256_mask:
315+
case X86::BI__builtin_ia32_vcvtps2ibs256_mask:
316+
case X86::BI__builtin_ia32_vcvtps2iubs256_mask:
317+
case X86::BI__builtin_ia32_vcvtph2ibs512_mask:
318+
case X86::BI__builtin_ia32_vcvtph2iubs512_mask:
319+
case X86::BI__builtin_ia32_vcvtps2ibs512_mask:
320+
case X86::BI__builtin_ia32_vcvtps2iubs512_mask:
305321
ArgNum = 3;
306322
HasRC = true;
307323
break;
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64 -target-feature +avx10.2-512 \
2+
// RUN: -Wall -Werror -verify
3+
// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=i386 -target-feature +avx10.2-512 \
4+
// RUN: -Wall -Werror -verify
5+
6+
#include <immintrin.h>
7+
8+
__m512i test_mm512_ipcvt_roundph_epi8(__m512h __A) {
9+
return _mm512_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
10+
}
11+
12+
__m512i test_mm512_mask_ipcvt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) {
13+
return _mm512_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
14+
}
15+
16+
__m512i test_mm512_maskz_ipcvt_roundph_epi8(__mmask32 __A, __m512h __B) {
17+
return _mm512_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
18+
}
19+
20+
__m512i test_mm512_ipcvt_roundph_epu8(__m512h __A) {
21+
return _mm512_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
22+
}
23+
24+
__m512i test_mm512_mask_ipcvt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) {
25+
return _mm512_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
26+
}
27+
28+
__m512i test_mm512_maskz_ipcvt_roundph_epu8(__mmask32 __A, __m512h __B) {
29+
return _mm512_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
30+
}
31+
32+
__m512i test_mm512_ipcvt_roundps_epi8(__m512 __A) {
33+
return _mm512_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
34+
}
35+
36+
__m512i test_mm512_mask_ipcvt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) {
37+
return _mm512_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
38+
}
39+
40+
__m512i test_mm512_maskz_ipcvt_roundps_epi8(__mmask16 __A, __m512 __B) {
41+
return _mm512_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
42+
}
43+
44+
__m512i test_mm512_ipcvt_roundps_epu8(__m512 __A) {
45+
return _mm512_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
46+
}
47+
48+
__m512i test_mm512_mask_ipcvt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) {
49+
return _mm512_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
50+
}
51+
52+
__m512i test_mm512_maskz_ipcvt_roundps_epu8(__mmask16 __A, __m512 __B) {
53+
return _mm512_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
54+
}
55+
56+
__m512i test_mm512_ipcvtt_roundph_epi8(__m512h __A) {
57+
return _mm512_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
58+
}
59+
60+
__m512i test_mm512_mask_ipcvtt_roundph_epi8(__m512i __S, __mmask32 __A, __m512h __B) {
61+
return _mm512_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
62+
}
63+
64+
__m512i test_mm512_maskz_ipcvtt_roundph_epi8(__mmask32 __A, __m512h __B) {
65+
return _mm512_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
66+
}
67+
68+
__m512i test_mm512_ipcvtt_roundph_epu8(__m512h __A) {
69+
return _mm512_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
70+
}
71+
72+
__m512i test_mm512_mask_ipcvtt_roundph_epu8(__m512i __S, __mmask32 __A, __m512h __B) {
73+
return _mm512_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
74+
}
75+
76+
__m512i test_mm512_maskz_ipcvtt_roundph_epu8(__mmask32 __A, __m512h __B) {
77+
return _mm512_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
78+
}
79+
80+
__m512i test_mm512_ipcvtt_roundps_epi8(__m512 __A) {
81+
return _mm512_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
82+
}
83+
84+
__m512i test_mm512_mask_ipcvtt_roundps_epi8(__m512i __S, __mmask16 __A, __m512 __B) {
85+
return _mm512_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
86+
}
87+
88+
__m512i test_mm512_maskz_ipcvtt_roundps_epi8(__mmask16 __A, __m512 __B) {
89+
return _mm512_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
90+
}
91+
92+
__m512i test_mm512_ipcvtt_roundps_epu8(__m512 __A) {
93+
return _mm512_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
94+
}
95+
96+
__m512i test_mm512_mask_ipcvtt_roundps_epu8(__m512i __S, __mmask16 __A, __m512 __B) {
97+
return _mm512_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
98+
}
99+
100+
__m512i test_mm512_maskz_ipcvtt_roundps_epu8(__mmask16 __A, __m512 __B) {
101+
return _mm512_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
102+
}
103+
104+
__m256i test_mm256_ipcvt_roundph_epi8(__m256h __A) {
105+
return _mm256_ipcvt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
106+
}
107+
108+
__m256i test_mm256_mask_ipcvt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) {
109+
return _mm256_mask_ipcvt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
110+
}
111+
112+
__m256i test_mm256_maskz_ipcvt_roundph_epi8(__mmask16 __A, __m256h __B) {
113+
return _mm256_maskz_ipcvt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
114+
}
115+
116+
__m256i test_mm256_ipcvt_roundph_epu8(__m256h __A) {
117+
return _mm256_ipcvt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
118+
}
119+
120+
__m256i test_mm256_mask_ipcvt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) {
121+
return _mm256_mask_ipcvt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
122+
}
123+
124+
__m256i test_mm256_maskz_ipcvt_roundph_epu8(__mmask16 __A, __m256h __B) {
125+
return _mm256_maskz_ipcvt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
126+
}
127+
128+
__m256i test_mm256_ipcvt_roundps_epi8(__m256 __A) {
129+
return _mm256_ipcvt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
130+
}
131+
132+
__m256i test_mm256_mask_ipcvt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) {
133+
return _mm256_mask_ipcvt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
134+
}
135+
136+
__m256i test_mm256_maskz_ipcvt_roundps_epi8(__mmask8 __A, __m256 __B) {
137+
return _mm256_maskz_ipcvt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
138+
}
139+
140+
__m256i test_mm256_ipcvt_roundps_epu8(__m256 __A) {
141+
return _mm256_ipcvt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
142+
}
143+
144+
__m256i test_mm256_mask_ipcvt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) {
145+
return _mm256_mask_ipcvt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
146+
}
147+
148+
__m256i test_mm256_maskz_ipcvt_roundps_epu8(__mmask8 __A, __m256 __B) {
149+
return _mm256_maskz_ipcvt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
150+
}
151+
152+
__m256i test_mm256_ipcvtt_roundph_epi8(__m256h __A) {
153+
return _mm256_ipcvtt_roundph_epi8(__A, 22); // expected-error {{invalid rounding argument}}
154+
}
155+
156+
__m256i test_mm256_mask_ipcvtt_roundph_epi8(__m256i __S, __mmask16 __A, __m256h __B) {
157+
return _mm256_mask_ipcvtt_roundph_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
158+
}
159+
160+
__m256i test_mm256_maskz_ipcvtt_roundph_epi8(__mmask16 __A, __m256h __B) {
161+
return _mm256_maskz_ipcvtt_roundph_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
162+
}
163+
164+
__m256i test_mm256_ipcvtt_roundph_epu8(__m256h __A) {
165+
return _mm256_ipcvtt_roundph_epu8(__A, 22); // expected-error {{invalid rounding argument}}
166+
}
167+
168+
__m256i test_mm256_mask_ipcvtt_roundph_epu8(__m256i __S, __mmask16 __A, __m256h __B) {
169+
return _mm256_mask_ipcvtt_roundph_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
170+
}
171+
172+
__m256i test_mm256_maskz_ipcvtt_roundph_epu8(__mmask16 __A, __m256h __B) {
173+
return _mm256_maskz_ipcvtt_roundph_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
174+
}
175+
176+
__m256i test_mm256_ipcvtt_roundps_epi8(__m256 __A) {
177+
return _mm256_ipcvtt_roundps_epi8(__A, 22); // expected-error {{invalid rounding argument}}
178+
}
179+
180+
__m256i test_mm256_mask_ipcvtt_roundps_epi8(__m256i __S, __mmask8 __A, __m256 __B) {
181+
return _mm256_mask_ipcvtt_roundps_epi8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
182+
}
183+
184+
__m256i test_mm256_maskz_ipcvtt_roundps_epi8(__mmask8 __A, __m256 __B) {
185+
return _mm256_maskz_ipcvtt_roundps_epi8(__A, __B, 22); // expected-error {{invalid rounding argument}}
186+
}
187+
188+
__m256i test_mm256_ipcvtt_roundps_epu8(__m256 __A) {
189+
return _mm256_ipcvtt_roundps_epu8(__A, 22); // expected-error {{invalid rounding argument}}
190+
}
191+
192+
__m256i test_mm256_mask_ipcvtt_roundps_epu8(__m256i __S, __mmask8 __A, __m256 __B) {
193+
return _mm256_mask_ipcvtt_roundps_epu8(__S, __A, __B, 22); // expected-error {{invalid rounding argument}}
194+
}
195+
196+
__m256i test_mm256_maskz_ipcvtt_roundps_epu8(__mmask8 __A, __m256 __B) {
197+
return _mm256_maskz_ipcvtt_roundps_epu8(__A, __B, 22); // expected-error {{invalid rounding argument}}
198+
}

0 commit comments

Comments
 (0)