Skip to content

Commit f42f5ea

Browse files
authored
Add detection for Intel Advanced Matrix Extensions (AMX) instructions (#231)
Tested using intel SDE: https://www.intel.com/content/www/us/en/download/684897/intel-software-development-emulator.html Test scripts: ``` bash scripts/local-build.sh ISAS=() OPTIONS=() PLATFORMS=() OPTIONS+=(-quark); PLATFORMS+=("Quark") OPTIONS+=(-p4); PLATFORMS+=("Pentium4") OPTIONS+=(-p4p); PLATFORMS+=("Pentium4 Prescott") OPTIONS+=(-mrm); PLATFORMS+=("Merom") OPTIONS+=(-pnr); PLATFORMS+=("Penryn") OPTIONS+=(-nhm); PLATFORMS+=("Nehalem") OPTIONS+=(-wsm); PLATFORMS+=("Westmere") OPTIONS+=(-snb); PLATFORMS+=("Sandy Bridge") OPTIONS+=(-ivb); PLATFORMS+=("Ivy Bridge") OPTIONS+=(-hsw); PLATFORMS+=("Haswell") OPTIONS+=(-bdw); PLATFORMS+=("Broadwell") OPTIONS+=(-slt); PLATFORMS+=("Saltwell") OPTIONS+=(-slm); PLATFORMS+=("Silvermont") OPTIONS+=(-glm); PLATFORMS+=("Goldmont") OPTIONS+=(-glp); PLATFORMS+=("Goldmont Plus") OPTIONS+=(-tnt); PLATFORMS+=("Tremont") OPTIONS+=(-snr); PLATFORMS+=("Snow Ridge") OPTIONS+=(-skl); PLATFORMS+=("Skylake") OPTIONS+=(-cnl); PLATFORMS+=("Cannon Lake") OPTIONS+=(-icl); PLATFORMS+=("Ice Lake") OPTIONS+=(-skx); PLATFORMS+=("Skylake server") OPTIONS+=(-clx); PLATFORMS+=("Cascade Lake") OPTIONS+=(-cpx); PLATFORMS+=("Cooper Lake") OPTIONS+=(-icx); PLATFORMS+=("Ice Lake server") OPTIONS+=(-knl); PLATFORMS+=("Knights landing") OPTIONS+=(-knm); PLATFORMS+=("Knights mill") OPTIONS+=(-tgl); PLATFORMS+=("Tiger Lake") OPTIONS+=(-adl); PLATFORMS+=("Alder Lake") OPTIONS+=(-mtl); PLATFORMS+=("Meteor Lake") OPTIONS+=(-rpl); PLATFORMS+=("Raptor Lake") OPTIONS+=(-spr); PLATFORMS+=("Sapphire Rapids") OPTIONS+=(-gnr); PLATFORMS+=("Granite Rapids") OPTIONS+=(-gnr256); PLATFORMS+=("Granite Rapids (AVX10.1 / 256VL)") OPTIONS+=(-srf); PLATFORMS+=("Sierra Forest") OPTIONS+=(-arl); PLATFORMS+=("Arrow Lake") OPTIONS+=(-lnl); PLATFORMS+=("Lunar Lake") OPTIONS+=(-future); PLATFORMS+=("Future chip") ISAS+=("AMXBF16") ISAS+=("AMXTILE") ISAS+=("AMXINT8") ISAS+=("AMXFP16") SDE_BIN="/home/mingfeim/packages/sde-external-9.33.0-2024-01-07-lin/sde" for I in "${!PLATFORMS[@]}"; do echo "${PLATFORMS["${I}"]}" for J in "${!ISAS[@]}"; do "${SDE_BIN}" "${OPTIONS[$I]}" -- ./build/local/isa-info | grep ${ISAS[$J]} done done ``` Results: ``` Quark SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM or by the input cpuid definition file Pentium4 SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file SDE-ERROR: 64 bits applications are not supported by input chip: PENTIUM4 or by the input cpuid definition file Pentium4 Prescott AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Merom AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Penryn AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Nehalem AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Westmere AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Sandy Bridge AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Ivy Bridge AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Haswell AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Broadwell AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Saltwell AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Silvermont AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Goldmont AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Goldmont Plus AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Tremont AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Snow Ridge AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Skylake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Cannon Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Ice Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Skylake server AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Cascade Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Cooper Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Ice Lake server AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Knights landing AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Knights mill AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Tiger Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Alder Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Meteor Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Raptor Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Sapphire Rapids AMXBF16: yes AMXTILE: yes AMXINT8: yes AMXFP16: no Granite Rapids AMXBF16: yes AMXTILE: yes AMXINT8: yes AMXFP16: yes Granite Rapids (AVX10.1 / 256VL) AMXBF16: yes AMXTILE: yes AMXINT8: yes AMXFP16: yes Sierra Forest AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Arrow Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Lunar Lake AMXBF16: no AMXTILE: no AMXINT8: no AMXFP16: no Future chip AMXBF16: yes AMXTILE: yes AMXINT8: yes AMXFP16: yes ```
1 parent 6543fec commit f42f5ea

File tree

3 files changed

+84
-0
lines changed

3 files changed

+84
-0
lines changed

include/cpuinfo.h

+56
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,10 @@ struct cpuinfo_x86_isa {
812812
bool avx512vp2intersect;
813813
bool avx512_4vnniw;
814814
bool avx512_4fmaps;
815+
bool amx_bf16;
816+
bool amx_tile;
817+
bool amx_int8;
818+
bool amx_fp16;
815819
bool hle;
816820
bool rtm;
817821
bool xtest;
@@ -1328,6 +1332,58 @@ static inline bool cpuinfo_has_x86_avx512_4fmaps(void) {
13281332
#endif
13291333
}
13301334

1335+
/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection
1336+
*
1337+
* I. AMX is a new extensions to the x86 ISA to work on matrices, consists of
1338+
* 1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices in memory
1339+
* 2) Accelerator called Tile Matrix Multiply (TMUL), contains instructions operating on tiles
1340+
*
1341+
* II. Platforms that supports AMX:
1342+
* +-----------------+-----+----------+----------+----------+----------+
1343+
* | Platforms | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 |
1344+
* +-----------------+-----+----------+----------+----------+----------+
1345+
* | Sapphire Rapids | 4th | YES | YES | YES | NO |
1346+
* +-----------------+-----+----------+----------+----------+----------+
1347+
* | Emerald Rapids | 5th | YES | YES | YES | NO |
1348+
* +-----------------+-----+----------+----------+----------+----------+
1349+
* | Granite Rapids | 6th | YES | YES | YES | YES |
1350+
* +-----------------+-----+----------+----------+----------+----------+
1351+
*
1352+
* Reference: https://www.intel.com/content/www/us/en/products/docs
1353+
* /accelerator-engines/advanced-matrix-extensions/overview.html
1354+
*/
1355+
static inline bool cpuinfo_has_x86_amx_bf16(void) {
1356+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1357+
return cpuinfo_isa.amx_bf16;
1358+
#else
1359+
return false;
1360+
#endif
1361+
}
1362+
1363+
static inline bool cpuinfo_has_x86_amx_tile(void) {
1364+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1365+
return cpuinfo_isa.amx_tile;
1366+
#else
1367+
return false;
1368+
#endif
1369+
}
1370+
1371+
static inline bool cpuinfo_has_x86_amx_int8(void) {
1372+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1373+
return cpuinfo_isa.amx_int8;
1374+
#else
1375+
return false;
1376+
#endif
1377+
}
1378+
1379+
static inline bool cpuinfo_has_x86_amx_fp16(void) {
1380+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1381+
return cpuinfo_isa.amx_fp16;
1382+
#else
1383+
return false;
1384+
#endif
1385+
}
1386+
13311387
static inline bool cpuinfo_has_x86_hle(void) {
13321388
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
13331389
return cpuinfo_isa.hle;

src/x86/isa.c

+24
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,30 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
537537
*/
538538
isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
539539

540+
/*
541+
* AMX_BF16 instructions:
542+
* - Intel: edx[bit 22] in structured feature info (ecx = 0).
543+
*/
544+
isa.amx_bf16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00400000));
545+
546+
/*
547+
* AMX_TILE instructions:
548+
* - Intel: edx[bit 24] in structured feature info (ecx = 0).
549+
*/
550+
isa.amx_tile = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x01000000));
551+
552+
/*
553+
* AMX_INT8 instructions:
554+
* - Intel: edx[bit 25] in structured feature info (ecx = 0).
555+
*/
556+
isa.amx_int8 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x02000000));
557+
558+
/*
559+
* AMX_FP16 instructions:
560+
* - Intel: eax[bit 21] in structured feature info (ecx = 1).
561+
*/
562+
isa.amx_fp16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00200000));
563+
540564
/*
541565
* HLE instructions:
542566
* - Intel: ebx[bit 4] in structured feature info (ecx = 0).

tools/isa-info.c

+4
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,10 @@ int main(int argc, char** argv) {
7070
printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
7171
printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
7272
printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");
73+
printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no");
74+
printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no");
75+
printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no");
76+
printf("\tAMX_FP16: %s\n", cpuinfo_has_x86_amx_fp16() ? "yes" : "no");
7377
printf("\tAVXVNNI: %s\n", cpuinfo_has_x86_avxvnni() ? "yes" : "no");
7478

7579
printf("Multi-threading extensions:\n");

0 commit comments

Comments
 (0)