Skip to content

Commit 0a6a421

Browse files
committed
Add f16 inline ASM support for 32-bit ARM
1 parent 12b33d3 commit 0a6a421

File tree

3 files changed

+149
-6
lines changed

3 files changed

+149
-6
lines changed

compiler/rustc_codegen_llvm/src/asm.rs

+58
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,26 @@ fn llvm_fixup_input<'ll, 'tcx>(
10201020
value
10211021
}
10221022
}
1023+
(
1024+
InlineAsmRegClass::Arm(
1025+
ArmInlineAsmRegClass::dreg
1026+
| ArmInlineAsmRegClass::dreg_low8
1027+
| ArmInlineAsmRegClass::dreg_low16,
1028+
),
1029+
Abi::Vector { element, count: 4 },
1030+
) if element.primitive() == Primitive::Float(Float::F16) => {
1031+
bx.bitcast(value, bx.type_f64())
1032+
}
1033+
(
1034+
InlineAsmRegClass::Arm(
1035+
ArmInlineAsmRegClass::qreg
1036+
| ArmInlineAsmRegClass::qreg_low4
1037+
| ArmInlineAsmRegClass::qreg_low8,
1038+
),
1039+
Abi::Vector { element, count: 8 },
1040+
) if element.primitive() == Primitive::Float(Float::F16) => {
1041+
bx.bitcast(value, bx.type_vector(bx.type_i16(), 8))
1042+
}
10231043
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
10241044
match s.primitive() {
10251045
// MIPS only supports register-length arithmetics.
@@ -1130,6 +1150,26 @@ fn llvm_fixup_output<'ll, 'tcx>(
11301150
value
11311151
}
11321152
}
1153+
(
1154+
InlineAsmRegClass::Arm(
1155+
ArmInlineAsmRegClass::dreg
1156+
| ArmInlineAsmRegClass::dreg_low8
1157+
| ArmInlineAsmRegClass::dreg_low16,
1158+
),
1159+
Abi::Vector { element, count: 4 },
1160+
) if element.primitive() == Primitive::Float(Float::F16) => {
1161+
bx.bitcast(value, bx.type_vector(bx.type_f16(), 4))
1162+
}
1163+
(
1164+
InlineAsmRegClass::Arm(
1165+
ArmInlineAsmRegClass::qreg
1166+
| ArmInlineAsmRegClass::qreg_low4
1167+
| ArmInlineAsmRegClass::qreg_low8,
1168+
),
1169+
Abi::Vector { element, count: 8 },
1170+
) if element.primitive() == Primitive::Float(Float::F16) => {
1171+
bx.bitcast(value, bx.type_vector(bx.type_f16(), 8))
1172+
}
11331173
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
11341174
match s.primitive() {
11351175
// MIPS only supports register-length arithmetics.
@@ -1233,6 +1273,24 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
12331273
layout.llvm_type(cx)
12341274
}
12351275
}
1276+
(
1277+
InlineAsmRegClass::Arm(
1278+
ArmInlineAsmRegClass::dreg
1279+
| ArmInlineAsmRegClass::dreg_low8
1280+
| ArmInlineAsmRegClass::dreg_low16,
1281+
),
1282+
Abi::Vector { element, count: 4 },
1283+
) if element.primitive() == Primitive::Float(Float::F16) => cx.type_f64(),
1284+
(
1285+
InlineAsmRegClass::Arm(
1286+
ArmInlineAsmRegClass::qreg
1287+
| ArmInlineAsmRegClass::qreg_low4
1288+
| ArmInlineAsmRegClass::qreg_low8,
1289+
),
1290+
Abi::Vector { element, count: 8 },
1291+
) if element.primitive() == Primitive::Float(Float::F16) => {
1292+
cx.type_vector(cx.type_i16(), 8)
1293+
}
12361294
(InlineAsmRegClass::Mips(MipsInlineAsmRegClass::reg), Abi::Scalar(s)) => {
12371295
match s.primitive() {
12381296
// MIPS only supports register-length arithmetics.

compiler/rustc_target/src/asm/arm.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,16 @@ impl ArmInlineAsmRegClass {
4747
_arch: InlineAsmArch,
4848
) -> &'static [(InlineAsmType, Option<Symbol>)] {
4949
match self {
50-
Self::reg => types! { _: I8, I16, I32, F32; },
51-
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F32; },
50+
Self::reg => types! { _: I8, I16, I32, F16, F32; },
51+
Self::sreg | Self::sreg_low16 => types! { vfp2: I32, F16, F32; },
5252
Self::dreg_low16 | Self::dreg_low8 => types! {
53-
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
53+
vfp2: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5454
},
5555
Self::dreg => types! {
56-
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2);
56+
d32: I64, F64, VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2);
5757
},
5858
Self::qreg | Self::qreg_low8 | Self::qreg_low4 => types! {
59-
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4);
59+
neon: VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4);
6060
},
6161
}
6262
}

tests/assembly/asm/arm-types.rs

+86-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
//@ compile-flags: -C opt-level=0
55
//@ needs-llvm-components: arm
66

7-
#![feature(no_core, lang_items, rustc_attrs, repr_simd)]
7+
#![feature(no_core, lang_items, rustc_attrs, repr_simd, f16)]
88
#![crate_type = "rlib"]
99
#![no_core]
1010
#![allow(asm_sub_register, non_camel_case_types)]
@@ -38,6 +38,8 @@ pub struct i32x2(i32, i32);
3838
#[repr(simd)]
3939
pub struct i64x1(i64);
4040
#[repr(simd)]
41+
pub struct f16x4(f16, f16, f16, f16);
42+
#[repr(simd)]
4143
pub struct f32x2(f32, f32);
4244
#[repr(simd)]
4345
pub struct i8x16(i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8);
@@ -48,11 +50,14 @@ pub struct i32x4(i32, i32, i32, i32);
4850
#[repr(simd)]
4951
pub struct i64x2(i64, i64);
5052
#[repr(simd)]
53+
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
54+
#[repr(simd)]
5155
pub struct f32x4(f32, f32, f32, f32);
5256

5357
impl Copy for i8 {}
5458
impl Copy for i16 {}
5559
impl Copy for i32 {}
60+
impl Copy for f16 {}
5661
impl Copy for f32 {}
5762
impl Copy for i64 {}
5863
impl Copy for f64 {}
@@ -61,11 +66,13 @@ impl Copy for i8x8 {}
6166
impl Copy for i16x4 {}
6267
impl Copy for i32x2 {}
6368
impl Copy for i64x1 {}
69+
impl Copy for f16x4 {}
6470
impl Copy for f32x2 {}
6571
impl Copy for i8x16 {}
6672
impl Copy for i16x8 {}
6773
impl Copy for i32x4 {}
6874
impl Copy for i64x2 {}
75+
impl Copy for f16x8 {}
6976
impl Copy for f32x4 {}
7077

7178
extern "C" {
@@ -152,6 +159,12 @@ check!(reg_i16 i16 reg "mov");
152159
// CHECK: @NO_APP
153160
check!(reg_i32 i32 reg "mov");
154161

162+
// CHECK-LABEL: reg_f16:
163+
// CHECK: @APP
164+
// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
165+
// CHECK: @NO_APP
166+
check!(reg_f16 f16 reg "mov");
167+
155168
// CHECK-LABEL: reg_f32:
156169
// CHECK: @APP
157170
// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}}
@@ -170,6 +183,12 @@ check!(reg_ptr ptr reg "mov");
170183
// CHECK: @NO_APP
171184
check!(sreg_i32 i32 sreg "vmov.f32");
172185

186+
// CHECK-LABEL: sreg_f16:
187+
// CHECK: @APP
188+
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
189+
// CHECK: @NO_APP
190+
check!(sreg_f16 f16 sreg "vmov.f32");
191+
173192
// CHECK-LABEL: sreg_f32:
174193
// CHECK: @APP
175194
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
@@ -188,6 +207,12 @@ check!(sreg_ptr ptr sreg "vmov.f32");
188207
// CHECK: @NO_APP
189208
check!(sreg_low16_i32 i32 sreg_low16 "vmov.f32");
190209

210+
// CHECK-LABEL: sreg_low16_f16:
211+
// CHECK: @APP
212+
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
213+
// CHECK: @NO_APP
214+
check!(sreg_low16_f16 f16 sreg_low16 "vmov.f32");
215+
191216
// CHECK-LABEL: sreg_low16_f32:
192217
// CHECK: @APP
193218
// CHECK: vmov.f32 s{{[0-9]+}}, s{{[0-9]+}}
@@ -230,6 +255,12 @@ check!(dreg_i32x2 i32x2 dreg "vmov.f64");
230255
// CHECK: @NO_APP
231256
check!(dreg_i64x1 i64x1 dreg "vmov.f64");
232257

258+
// CHECK-LABEL: dreg_f16x4:
259+
// CHECK: @APP
260+
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
261+
// CHECK: @NO_APP
262+
check!(dreg_f16x4 f16x4 dreg "vmov.f64");
263+
233264
// CHECK-LABEL: dreg_f32x2:
234265
// CHECK: @APP
235266
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
@@ -272,6 +303,12 @@ check!(dreg_low16_i32x2 i32x2 dreg_low16 "vmov.f64");
272303
// CHECK: @NO_APP
273304
check!(dreg_low16_i64x1 i64x1 dreg_low16 "vmov.f64");
274305

306+
// CHECK-LABEL: dreg_low16_f16x4:
307+
// CHECK: @APP
308+
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
309+
// CHECK: @NO_APP
310+
check!(dreg_low16_f16x4 f16x4 dreg_low16 "vmov.f64");
311+
275312
// CHECK-LABEL: dreg_low16_f32x2:
276313
// CHECK: @APP
277314
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
@@ -314,6 +351,12 @@ check!(dreg_low8_i32x2 i32x2 dreg_low8 "vmov.f64");
314351
// CHECK: @NO_APP
315352
check!(dreg_low8_i64x1 i64x1 dreg_low8 "vmov.f64");
316353

354+
// CHECK-LABEL: dreg_low8_f16x4:
355+
// CHECK: @APP
356+
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
357+
// CHECK: @NO_APP
358+
check!(dreg_low8_f16x4 f16x4 dreg_low8 "vmov.f64");
359+
317360
// CHECK-LABEL: dreg_low8_f32x2:
318361
// CHECK: @APP
319362
// CHECK: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}}
@@ -344,6 +387,12 @@ check!(qreg_i32x4 i32x4 qreg "vmov");
344387
// CHECK: @NO_APP
345388
check!(qreg_i64x2 i64x2 qreg "vmov");
346389

390+
// CHECK-LABEL: qreg_f16x8:
391+
// CHECK: @APP
392+
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
393+
// CHECK: @NO_APP
394+
check!(qreg_f16x8 f16x8 qreg "vmov");
395+
347396
// CHECK-LABEL: qreg_f32x4:
348397
// CHECK: @APP
349398
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
@@ -374,6 +423,12 @@ check!(qreg_low8_i32x4 i32x4 qreg_low8 "vmov");
374423
// CHECK: @NO_APP
375424
check!(qreg_low8_i64x2 i64x2 qreg_low8 "vmov");
376425

426+
// CHECK-LABEL: qreg_low8_f16x8:
427+
// CHECK: @APP
428+
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
429+
// CHECK: @NO_APP
430+
check!(qreg_low8_f16x8 f16x8 qreg_low8 "vmov");
431+
377432
// CHECK-LABEL: qreg_low8_f32x4:
378433
// CHECK: @APP
379434
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
@@ -404,6 +459,12 @@ check!(qreg_low4_i32x4 i32x4 qreg_low4 "vmov");
404459
// CHECK: @NO_APP
405460
check!(qreg_low4_i64x2 i64x2 qreg_low4 "vmov");
406461

462+
// CHECK-LABEL: qreg_low4_f16x8:
463+
// CHECK: @APP
464+
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
465+
// CHECK: @NO_APP
466+
check!(qreg_low4_f16x8 f16x8 qreg_low4 "vmov");
467+
407468
// CHECK-LABEL: qreg_low4_f32x4:
408469
// CHECK: @APP
409470
// CHECK: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
@@ -428,6 +489,12 @@ check_reg!(r0_i16 i16 "r0" "mov");
428489
// CHECK: @NO_APP
429490
check_reg!(r0_i32 i32 "r0" "mov");
430491

492+
// CHECK-LABEL: r0_f16:
493+
// CHECK: @APP
494+
// CHECK: mov r0, r0
495+
// CHECK: @NO_APP
496+
check_reg!(r0_f16 f16 "r0" "mov");
497+
431498
// CHECK-LABEL: r0_f32:
432499
// CHECK: @APP
433500
// CHECK: mov r0, r0
@@ -446,6 +513,12 @@ check_reg!(r0_ptr ptr "r0" "mov");
446513
// CHECK: @NO_APP
447514
check_reg!(s0_i32 i32 "s0" "vmov.f32");
448515

516+
// CHECK-LABEL: s0_f16:
517+
// CHECK: @APP
518+
// CHECK: vmov.f32 s0, s0
519+
// CHECK: @NO_APP
520+
check_reg!(s0_f16 f16 "s0" "vmov.f32");
521+
449522
// CHECK-LABEL: s0_f32:
450523
// CHECK: @APP
451524
// CHECK: vmov.f32 s0, s0
@@ -494,6 +567,12 @@ check_reg!(d0_i32x2 i32x2 "d0" "vmov.f64");
494567
// CHECK: @NO_APP
495568
check_reg!(d0_i64x1 i64x1 "d0" "vmov.f64");
496569

570+
// CHECK-LABEL: d0_f16x4:
571+
// CHECK: @APP
572+
// CHECK: vmov.f64 d0, d0
573+
// CHECK: @NO_APP
574+
check_reg!(d0_f16x4 f16x4 "d0" "vmov.f64");
575+
497576
// CHECK-LABEL: d0_f32x2:
498577
// CHECK: @APP
499578
// CHECK: vmov.f64 d0, d0
@@ -524,6 +603,12 @@ check_reg!(q0_i32x4 i32x4 "q0" "vmov");
524603
// CHECK: @NO_APP
525604
check_reg!(q0_i64x2 i64x2 "q0" "vmov");
526605

606+
// CHECK-LABEL: q0_f16x8:
607+
// CHECK: @APP
608+
// CHECK: vorr q0, q0, q0
609+
// CHECK: @NO_APP
610+
check_reg!(q0_f16x8 f16x8 "q0" "vmov");
611+
527612
// CHECK-LABEL: q0_f32x4:
528613
// CHECK: @APP
529614
// CHECK: vorr q0, q0, q0

0 commit comments

Comments
 (0)