Skip to content

Commit c708b8e

Browse files
committed
Merge remote-tracking branch 'upstream/main' into patch-1
2 parents f527e9b + 3c8b153 commit c708b8e

File tree

8 files changed

+211
-35
lines changed

8 files changed

+211
-35
lines changed

BUILD.bazel

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ WINDOWS_X86_SRCS = [
9999
"src/x86/windows/init.c",
100100
]
101101

102+
WINDOWS_ARM_SRCS = [
103+
"src/arm/windows/init-by-logical-sys-info.c",
104+
"src/arm/windows/init.c",
105+
]
106+
102107
MACH_X86_SRCS = [
103108
"src/x86/mach/init.c",
104109
]
@@ -128,6 +133,7 @@ cc_library(
128133
":macos_x86_64_legacy": COMMON_SRCS + X86_SRCS + MACH_SRCS + MACH_X86_SRCS,
129134
":macos_arm64": COMMON_SRCS + MACH_SRCS + MACH_ARM_SRCS,
130135
":windows_x86_64": COMMON_SRCS + X86_SRCS + WINDOWS_X86_SRCS,
136+
":windows_arm64": COMMON_SRCS + ARM_SRCS + WINDOWS_ARM_SRCS,
131137
":android_armv7": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM32_SRCS + ANDROID_ARM_SRCS,
132138
":android_arm64": COMMON_SRCS + ARM_SRCS + LINUX_SRCS + LINUX_ARM64_SRCS + ANDROID_ARM_SRCS,
133139
":android_x86": COMMON_SRCS + X86_SRCS + LINUX_SRCS + LINUX_X86_SRCS,
@@ -149,6 +155,7 @@ cc_library(
149155
}),
150156
copts = select({
151157
":windows_x86_64": [],
158+
":windows_arm64": [],
152159
"//conditions:default": C99OPTS,
153160
}) + [
154161
"-Iexternal/cpuinfo/include",
@@ -281,6 +288,11 @@ config_setting(
281288
values = {"cpu": "x64_windows"},
282289
)
283290

291+
config_setting(
292+
name = "windows_arm64",
293+
values = {"cpu": "arm64_windows"},
294+
)
295+
284296
config_setting(
285297
name = "android_armv7",
286298
values = {

MODULE.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module(name = "cpuinfo")

WORKSPACE.bazel

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
workspace(name = "org_pytorch_cpuinfo")
1+
workspace(name = "org_pytorch_cpuinfo")

include/cpuinfo.h

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,13 @@ struct cpuinfo_x86_isa {
813813
bool avx512vp2intersect;
814814
bool avx512_4vnniw;
815815
bool avx512_4fmaps;
816+
bool amx_bf16;
817+
bool amx_tile;
818+
bool amx_int8;
819+
bool amx_fp16;
820+
bool avx_vnni_int8;
821+
bool avx_vnni_int16;
822+
bool avx_ne_convert;
816823
bool hle;
817824
bool rtm;
818825
bool xtest;
@@ -1329,6 +1336,98 @@ static inline bool cpuinfo_has_x86_avx512_4fmaps(void) {
13291336
#endif
13301337
}
13311338

1339+
/* [NOTE] Intel Advanced Matrix Extensions (AMX) detection
1340+
*
1341+
* I. AMX is a new extensions to the x86 ISA to work on matrices, consists of
1342+
* 1) 2-dimentional registers (tiles), hold sub-matrices from larger matrices in memory
1343+
* 2) Accelerator called Tile Matrix Multiply (TMUL), contains instructions operating on tiles
1344+
*
1345+
* II. Platforms that supports AMX:
1346+
* +-----------------+-----+----------+----------+----------+----------+
1347+
* | Platforms | Gen | amx-bf16 | amx-tile | amx-int8 | amx-fp16 |
1348+
* +-----------------+-----+----------+----------+----------+----------+
1349+
* | Sapphire Rapids | 4th | YES | YES | YES | NO |
1350+
* +-----------------+-----+----------+----------+----------+----------+
1351+
* | Emerald Rapids | 5th | YES | YES | YES | NO |
1352+
* +-----------------+-----+----------+----------+----------+----------+
1353+
* | Granite Rapids | 6th | YES | YES | YES | YES |
1354+
* +-----------------+-----+----------+----------+----------+----------+
1355+
*
1356+
* Reference: https://www.intel.com/content/www/us/en/products/docs
1357+
* /accelerator-engines/advanced-matrix-extensions/overview.html
1358+
*/
1359+
static inline bool cpuinfo_has_x86_amx_bf16(void) {
1360+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1361+
return cpuinfo_isa.amx_bf16;
1362+
#else
1363+
return false;
1364+
#endif
1365+
}
1366+
1367+
static inline bool cpuinfo_has_x86_amx_tile(void) {
1368+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1369+
return cpuinfo_isa.amx_tile;
1370+
#else
1371+
return false;
1372+
#endif
1373+
}
1374+
1375+
static inline bool cpuinfo_has_x86_amx_int8(void) {
1376+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1377+
return cpuinfo_isa.amx_int8;
1378+
#else
1379+
return false;
1380+
#endif
1381+
}
1382+
1383+
static inline bool cpuinfo_has_x86_amx_fp16(void) {
1384+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1385+
return cpuinfo_isa.amx_fp16;
1386+
#else
1387+
return false;
1388+
#endif
1389+
}
1390+
1391+
/*
1392+
* Intel AVX Vector Neural Network Instructions (VNNI) INT8
1393+
* Supported Platfroms: Sierra Forest, Arrow Lake, Lunar Lake
1394+
*/
1395+
static inline bool cpuinfo_has_x86_avx_vnni_int8(void) {
1396+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1397+
return cpuinfo_isa.avx_vnni_int8;
1398+
#else
1399+
return false;
1400+
#endif
1401+
}
1402+
1403+
/*
1404+
* Intel AVX Vector Neural Network Instructions (VNNI) INT16
1405+
* Supported Platfroms: Arrow Lake, Lunar Lake
1406+
*/
1407+
static inline bool cpuinfo_has_x86_avx_vnni_int16(void) {
1408+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1409+
return cpuinfo_isa.avx_vnni_int16;
1410+
#else
1411+
return false;
1412+
#endif
1413+
}
1414+
1415+
/*
1416+
* A new set of instructions, which can convert low precision floating point
1417+
* like BF16/FP16 to high precision floating point FP32, as well as convert FP32
1418+
* elements to BF16. This instruction allows the platform to have improved AI
1419+
* capabilities and better compatibility.
1420+
*
1421+
* Supported Platforms: Sierra Forest, Arrow Lake, Lunar Lake
1422+
*/
1423+
static inline bool cpuinfo_has_x86_avx_ne_convert(void) {
1424+
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
1425+
return cpuinfo_isa.avx_ne_convert;
1426+
#else
1427+
return false;
1428+
#endif
1429+
}
1430+
13321431
static inline bool cpuinfo_has_x86_hle(void) {
13331432
#if CPUINFO_ARCH_X86 || CPUINFO_ARCH_X86_64
13341433
return cpuinfo_isa.hle;

src/arm/windows/init.c

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -20,42 +20,56 @@ static struct woa_chip_info woa_chip_unknown = {
2020
{{cpuinfo_vendor_unknown, cpuinfo_uarch_unknown, 0}}};
2121

2222
/* Please add new SoC/chip info here! */
23-
static struct woa_chip_info woa_chips[] = {
23+
static struct woa_chip_info woa_chips[woa_chip_name_last] = {
2424
/* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */
25-
{L"Microsoft SQ1",
26-
woa_chip_name_microsoft_sq_1,
27-
{{
28-
cpuinfo_vendor_arm,
29-
cpuinfo_uarch_cortex_a55,
30-
1800000000,
31-
},
32-
{
33-
cpuinfo_vendor_arm,
34-
cpuinfo_uarch_cortex_a76,
35-
3000000000,
36-
}}},
25+
[woa_chip_name_microsoft_sq_1] =
26+
{L"Microsoft SQ1",
27+
woa_chip_name_microsoft_sq_1,
28+
{{
29+
cpuinfo_vendor_arm,
30+
cpuinfo_uarch_cortex_a55,
31+
1800000000,
32+
},
33+
{
34+
cpuinfo_vendor_arm,
35+
cpuinfo_uarch_cortex_a76,
36+
3000000000,
37+
}}},
3738
/* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */
38-
{L"Microsoft SQ2",
39-
woa_chip_name_microsoft_sq_2,
40-
{{
41-
cpuinfo_vendor_arm,
42-
cpuinfo_uarch_cortex_a55,
43-
2420000000,
44-
},
45-
{cpuinfo_vendor_arm, cpuinfo_uarch_cortex_a76, 3150000000}}},
39+
[woa_chip_name_microsoft_sq_2] =
40+
{L"Microsoft SQ2",
41+
woa_chip_name_microsoft_sq_2,
42+
{{
43+
cpuinfo_vendor_arm,
44+
cpuinfo_uarch_cortex_a55,
45+
2420000000,
46+
},
47+
{cpuinfo_vendor_arm, cpuinfo_uarch_cortex_a76, 3150000000}}},
48+
/* Snapdragon (TM) 8cx Gen 3 @ 3.0 GHz */
49+
[woa_chip_name_microsoft_sq_3] =
50+
{L"Snapdragon (TM) 8cx Gen 3",
51+
woa_chip_name_microsoft_sq_3,
52+
{{
53+
cpuinfo_vendor_arm,
54+
cpuinfo_uarch_cortex_a78,
55+
2420000000,
56+
},
57+
{cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}},
4658
/* Microsoft Windows Dev Kit 2023 */
47-
{L"Snapdragon Compute Platform",
48-
woa_chip_name_microsoft_sq_3,
49-
{{
50-
cpuinfo_vendor_arm,
51-
cpuinfo_uarch_cortex_a78,
52-
2420000000,
53-
},
54-
{cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}},
59+
[woa_chip_name_microsoft_sq_3_devkit] =
60+
{L"Snapdragon Compute Platform",
61+
woa_chip_name_microsoft_sq_3_devkit,
62+
{{
63+
cpuinfo_vendor_arm,
64+
cpuinfo_uarch_cortex_a78,
65+
2420000000,
66+
},
67+
{cpuinfo_vendor_arm, cpuinfo_uarch_cortex_x1, 3000000000}}},
5568
/* Ampere Altra */
56-
{L"Ampere(R) Altra(R) Processor",
57-
woa_chip_name_ampere_altra,
58-
{{cpuinfo_vendor_arm, cpuinfo_uarch_neoverse_n1, 3000000000}}}};
69+
[woa_chip_name_ampere_altra] = {
70+
L"Ampere(R) Altra(R) Processor",
71+
woa_chip_name_ampere_altra,
72+
{{cpuinfo_vendor_arm, cpuinfo_uarch_neoverse_n1, 3000000000}}}};
5973

6074
BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
6175
struct woa_chip_info* chip_info = NULL;

src/arm/windows/windows-arm-init.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ enum woa_chip_name {
88
woa_chip_name_microsoft_sq_1 = 0,
99
woa_chip_name_microsoft_sq_2 = 1,
1010
woa_chip_name_microsoft_sq_3 = 2,
11-
woa_chip_name_ampere_altra = 3,
12-
woa_chip_name_unknown = 4,
11+
woa_chip_name_microsoft_sq_3_devkit = 3,
12+
woa_chip_name_ampere_altra = 4,
13+
woa_chip_name_unknown = 5,
1314
woa_chip_name_last = woa_chip_name_unknown
1415
};
1516

src/x86/isa.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,48 @@ struct cpuinfo_x86_isa cpuinfo_x86_detect_isa(
537537
*/
538538
isa.avx512bf16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00000020));
539539

540+
/*
541+
* AMX_BF16 instructions:
542+
* - Intel: edx[bit 22] in structured feature info (ecx = 0).
543+
*/
544+
isa.amx_bf16 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x00400000));
545+
546+
/*
547+
* AMX_TILE instructions:
548+
* - Intel: edx[bit 24] in structured feature info (ecx = 0).
549+
*/
550+
isa.amx_tile = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x01000000));
551+
552+
/*
553+
* AMX_INT8 instructions:
554+
* - Intel: edx[bit 25] in structured feature info (ecx = 0).
555+
*/
556+
isa.amx_int8 = avx512_regs && !!(structured_feature_info0.edx & UINT32_C(0x02000000));
557+
558+
/*
559+
* AMX_FP16 instructions:
560+
* - Intel: eax[bit 21] in structured feature info (ecx = 1).
561+
*/
562+
isa.amx_fp16 = avx512_regs && !!(structured_feature_info1.eax & UINT32_C(0x00200000));
563+
564+
/*
565+
* AVX_VNNI_INT8 instructions:
566+
* - Intel: edx[bit 4] in structured feature info (ecx = 1).
567+
*/
568+
isa.avx_vnni_int8 = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000010));
569+
570+
/*
571+
* AVX_VNNI_INT16 instructions:
572+
* - Intel: edx[bit 10] in structured feature info (ecx = 1).
573+
*/
574+
isa.avx_vnni_int16 = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000400));
575+
576+
/*
577+
* AVX_NE_CONVERT instructions:
578+
* - Intel: edx[bit 5] in structured feature info (ecx = 1).
579+
*/
580+
isa.avx_ne_convert = avx_regs && !!(structured_feature_info1.edx & UINT32_C(0x00000020));
581+
540582
/*
541583
* HLE instructions:
542584
* - Intel: ebx[bit 4] in structured feature info (ecx = 0).

tools/isa-info.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,14 @@ int main(int argc, char** argv) {
7070
printf("\tAVX512VP2INTERSECT: %s\n", cpuinfo_has_x86_avx512vp2intersect() ? "yes" : "no");
7171
printf("\tAVX512_4VNNIW: %s\n", cpuinfo_has_x86_avx512_4vnniw() ? "yes" : "no");
7272
printf("\tAVX512_4FMAPS: %s\n", cpuinfo_has_x86_avx512_4fmaps() ? "yes" : "no");
73+
printf("\tAMX_BF16: %s\n", cpuinfo_has_x86_amx_bf16() ? "yes" : "no");
74+
printf("\tAMX_TILE: %s\n", cpuinfo_has_x86_amx_tile() ? "yes" : "no");
75+
printf("\tAMX_INT8: %s\n", cpuinfo_has_x86_amx_int8() ? "yes" : "no");
76+
printf("\tAMX_FP16: %s\n", cpuinfo_has_x86_amx_fp16() ? "yes" : "no");
7377
printf("\tAVXVNNI: %s\n", cpuinfo_has_x86_avxvnni() ? "yes" : "no");
78+
printf("\tAVX_VNNI_INT8: %s\n", cpuinfo_has_x86_avx_vnni_int8() ? "yes" : "no");
79+
printf("\tAVX_VNNI_INT16: %s\n", cpuinfo_has_x86_avx_vnni_int16() ? "yes" : "no");
80+
printf("\tAVX_NE_CONVERT: %s\n", cpuinfo_has_x86_avx_ne_convert() ? "yes" : "no");
7481

7582
printf("Multi-threading extensions:\n");
7683
printf("\tMONITOR/MWAIT: %s\n", cpuinfo_has_x86_mwait() ? "yes" : "no");

0 commit comments

Comments
 (0)