Skip to content

Commit 26660a6

Browse files
committed
Atomic loads/stores, spill/reload, tests for __fp16 and half vectors.
1 parent d58853c commit 26660a6

File tree

14 files changed

+1297
-19
lines changed

14 files changed

+1297
-19
lines changed

clang/lib/Basic/Targets/SystemZ.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,12 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
9494

9595
// True if the backend supports operations on the half LLVM IR type.
9696
// By setting this to false, conversions will happen for _Float16 around
97-
// a statement by default with operations done in float. However, if
97+
// a statement by default, with operations done in float. However, if
9898
// -ffloat16-excess-precision=none is given, no conversions will be made
9999
// and instead the backend will promote each half operation to float
100100
// individually.
101101
HasLegalHalfType = false;
102-
// Allow half arguments and return values.
102+
// Allow half arguments and return values (__fp16).
103103
HalfArgsAndReturns = true;
104104
// Support _Float16.
105105
HasFloat16 = true;

clang/lib/CodeGen/Targets/SystemZ.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
185185

186186
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
187187
switch (BT->getKind()) {
188-
// case BuiltinType::Half: // __fp16 Support __fp16??
188+
case BuiltinType::Half: // __fp16
189189
case BuiltinType::Float16: // _Float16
190190
case BuiltinType::Float:
191191
case BuiltinType::Double:

clang/test/CodeGen/SystemZ/fp16.c

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// RUN: %clang_cc1 -triple s390x-linux-gnu -emit-llvm -o - %s \
2+
// RUN: | FileCheck %s
3+
4+
__fp16 f(__fp16 a, __fp16 b, __fp16 c, __fp16 d) {
5+
return a * b + c * d;
6+
}
7+
8+
// CHECK-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: %a.addr = alloca half, align 2
11+
// CHECK-NEXT: %b.addr = alloca half, align 2
12+
// CHECK-NEXT: %c.addr = alloca half, align 2
13+
// CHECK-NEXT: %d.addr = alloca half, align 2
14+
// CHECK-NEXT: store half %a, ptr %a.addr, align 2
15+
// CHECK-NEXT: store half %b, ptr %b.addr, align 2
16+
// CHECK-NEXT: store half %c, ptr %c.addr, align 2
17+
// CHECK-NEXT: store half %d, ptr %d.addr, align 2
18+
// CHECK-NEXT: %0 = load half, ptr %a.addr, align 2
19+
// CHECK-NEXT: %conv = fpext half %0 to float
20+
// CHECK-NEXT: %1 = load half, ptr %b.addr, align 2
21+
// CHECK-NEXT: %conv1 = fpext half %1 to float
22+
// CHECK-NEXT: %mul = fmul float %conv, %conv1
23+
// CHECK-NEXT: %2 = load half, ptr %c.addr, align 2
24+
// CHECK-NEXT: %conv2 = fpext half %2 to float
25+
// CHECK-NEXT: %3 = load half, ptr %d.addr, align 2
26+
// CHECK-NEXT: %conv3 = fpext half %3 to float
27+
// CHECK-NEXT: %mul4 = fmul float %conv2, %conv3
28+
// CHECK-NEXT: %add = fadd float %mul, %mul4
29+
// CHECK-NEXT: %4 = fptrunc float %add to half
30+
// CHECK-NEXT: ret half %4
31+
// CHECK-NEXT: }
32+

clang/test/CodeGen/SystemZ/systemz-abi.c

+43
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ long long pass_longlong(long long arg) { return arg; }
4545
__int128 pass_int128(__int128 arg) { return arg; }
4646
// CHECK-LABEL: define{{.*}} void @pass_int128(ptr dead_on_unwind noalias writable sret(i128) align 8 %{{.*}}, ptr %0)
4747

48+
__fp16 pass___fp16(__fp16 arg) { return arg; }
49+
// CHECK-LABEL: define{{.*}} half @pass___fp16(half %{{.*}})
50+
4851
_Float16 pass__Float16(_Float16 arg) { return arg; }
4952
// CHECK-LABEL: define{{.*}} half @pass__Float16(half %{{.*}})
5053

@@ -75,6 +78,8 @@ _Complex long pass_complex_long(_Complex long arg) { return arg; }
7578
_Complex long long pass_complex_longlong(_Complex long long arg) { return arg; }
7679
// CHECK-LABEL: define{{.*}} void @pass_complex_longlong(ptr dead_on_unwind noalias writable sret({ i64, i64 }) align 8 %{{.*}}, ptr %{{.*}}arg)
7780

81+
// _Complex __fp16 is (currently?) not allowed.
82+
7883
_Complex _Float16 pass_complex__Float16(_Complex _Float16 arg) { return arg; }
7984
// CHECK-LABEL: define{{.*}} void @pass_complex__Float16(ptr dead_on_unwind noalias writable sret({ half, half }) align 2 %{{.*}}, ptr %{{.*}}arg)
8085

@@ -129,6 +134,11 @@ struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; }
129134

130135
// Float-like aggregate types
131136

137+
struct agg___fp16 { __fp16 a; };
138+
struct agg___fp16 pass_agg___fp16(struct agg___fp16 arg) { return arg; }
139+
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, half %{{.*}})
140+
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, i16 noext %{{.*}})
141+
132142
struct agg__Float16 { _Float16 a; };
133143
struct agg__Float16 pass_agg__Float16(struct agg__Float16 arg) { return arg; }
134144
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, half %{{.*}})
@@ -148,6 +158,11 @@ struct agg_longdouble { long double a; };
148158
struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; }
149159
// CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}})
150160

161+
struct agg___fp16_a8 { __fp16 a __attribute__((aligned (8))); };
162+
struct agg___fp16_a8 pass_agg___fp16_a8(struct agg___fp16_a8 arg) { return arg; }
163+
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16_a8) align 8 %{{.*}}, double %{{.*}})
164+
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg___fp16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16_a8) align 8 %{{.*}}, i64 %{{.*}})
165+
151166
struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); };
152167
struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; }
153168
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}})
@@ -180,6 +195,10 @@ struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; }
180195

181196
// Union types likewise are *not* float-like aggregate types
182197

198+
union union___fp16 { __fp16 a; };
199+
union union___fp16 pass_union___fp16(union union___fp16 arg) { return arg; }
200+
// CHECK-LABEL: define{{.*}} void @pass_union___fp16(ptr dead_on_unwind noalias writable sret(%union.union___fp16) align 2 %{{.*}}, i16 noext %{{.*}})
201+
183202
union union__Float16 { _Float16 a; };
184203
union union__Float16 pass_union__Float16(union union__Float16 arg) { return arg; }
185204
// CHECK-LABEL: define{{.*}} void @pass_union__Float16(ptr dead_on_unwind noalias writable sret(%union.union__Float16) align 2 %{{.*}}, i16 noext %{{.*}})
@@ -461,6 +480,30 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l,
461480
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
462481
// CHECK: ret void
463482

483+
struct agg___fp16 va_agg___fp16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg___fp16); }
484+
// CHECK-LABEL: define{{.*}} void @va_agg___fp16(ptr dead_on_unwind noalias writable sret(%struct.agg___fp16) align 2 %{{.*}}, ptr %{{.*}}
485+
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
486+
// SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 0
487+
// CHECK: [[REG_COUNT:%[^ ]+]] = load i64, ptr [[REG_COUNT_PTR]]
488+
// HARD-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 4
489+
// SOFT-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
490+
// CHECK: br i1 [[FITS_IN_REGS]],
491+
// CHECK: [[SCALED_REG_COUNT:%[^ ]+]] = mul i64 [[REG_COUNT]], 8
492+
// HARD-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 128
493+
// SOFT-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 22
494+
// CHECK: [[REG_SAVE_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 3
495+
// CHECK: [[REG_SAVE_AREA:%[^ ]+]] = load ptr, ptr [[REG_SAVE_AREA_PTR:[^ ]+]]
496+
// CHECK: [[RAW_REG_ADDR:%[^ ]+]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i64 [[REG_OFFSET]]
497+
// CHECK: [[REG_COUNT1:%[^ ]+]] = add i64 [[REG_COUNT]], 1
498+
// CHECK: store i64 [[REG_COUNT1]], ptr [[REG_COUNT_PTR]]
499+
// CHECK: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 2
500+
// CHECK: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_PTR]]
501+
// CHECK: [[RAW_MEM_ADDR:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 6
502+
// CHECK: [[OVERFLOW_ARG_AREA2:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 8
503+
// CHECK: store ptr [[OVERFLOW_ARG_AREA2]], ptr [[OVERFLOW_ARG_AREA_PTR]]
504+
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
505+
// CHECK: ret void
506+
464507
struct agg__Float16 va_agg__Float16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg__Float16); }
465508
// CHECK-LABEL: define{{.*}} void @va_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, ptr %{{.*}}
466509
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1

llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

+47-13
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
523523
setTruncStoreAction(VT, MVT::f16, Expand);
524524
}
525525
setOperationAction(ISD::LOAD, MVT::f16, Custom);
526+
setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Custom);
526527
setOperationAction(ISD::STORE, MVT::f16, Custom);
528+
setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Custom);
527529
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
528530
setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
529531
setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
@@ -4596,6 +4598,22 @@ SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
45964598
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
45974599
}
45984600

4601+
SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4602+
SelectionDAG &DAG) const {
4603+
MVT RegVT = Op.getSimpleValueType();
4604+
if (RegVT.getSizeInBits() == 128)
4605+
return lowerATOMIC_LDST_I128(Op, DAG);
4606+
return lowerLoadF16(Op, DAG);
4607+
}
4608+
4609+
SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4610+
SelectionDAG &DAG) const {
4611+
auto *Node = cast<AtomicSDNode>(Op.getNode());
4612+
if (Node->getMemoryVT().getSizeInBits() == 128)
4613+
return lowerATOMIC_LDST_I128(Op, DAG);
4614+
return lowerStoreF16(Op, DAG);
4615+
}
4616+
45994617
SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
46004618
SelectionDAG &DAG) const {
46014619
auto *Node = cast<AtomicSDNode>(Op.getNode());
@@ -6217,15 +6235,25 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
62176235
MVT RegVT = Op.getSimpleValueType();
62186236
if (RegVT != MVT::f16)
62196237
return SDValue();
6220-
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6221-
SDLoc DL(Ld);
6222-
assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending f16 load");
6238+
6239+
SDLoc DL(Op);
6240+
SDValue NewLd;
6241+
if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6242+
assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6243+
NewLd = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, MVT::i16, MVT::i32,
6244+
AtomicLd->getChain(), AtomicLd->getBasePtr(),
6245+
AtomicLd->getMemOperand());
6246+
cast<AtomicSDNode>(NewLd)->setExtensionType(ISD::EXTLOAD);
6247+
} else {
6248+
LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6249+
assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6250+
NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(),
6251+
Ld->getBasePtr(), Ld->getPointerInfo(),
6252+
MVT::i16, Ld->getOriginalAlign(),
6253+
Ld->getMemOperand()->getFlags());
6254+
}
62236255
// Load as integer, shift and insert into upper 2 bytes of the FP register.
62246256
// TODO: Use VLEH if available.
6225-
SDValue NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Ld->getChain(),
6226-
Ld->getBasePtr(), Ld->getPointerInfo(),
6227-
MVT::i16, Ld->getOriginalAlign(),
6228-
Ld->getMemOperand()->getFlags());
62296257
SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i32, NewLd,
62306258
DAG.getConstant(16, DL, MVT::i32));
62316259
SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Shft);
@@ -6236,20 +6264,25 @@ SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
62366264

62376265
SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
62386266
SelectionDAG &DAG) const {
6239-
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
6240-
SDLoc DL(St);
6241-
SDValue StoredVal = St->getValue();
6267+
SDValue StoredVal = Op->getOperand(1);
62426268
MVT StoreVT = StoredVal.getSimpleValueType();
62436269
if (StoreVT != MVT::f16)
62446270
return SDValue();
6245-
// Move into a GPR, shift and store the 2 bytes.
6246-
// TODO: Use VSTEH if available.
6271+
6272+
// Move into a GPR, shift and store the 2 bytes. TODO: Use VSTEH if available.
6273+
SDLoc DL(Op);
62476274
SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f32);
62486275
SDValue In32 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL,
62496276
MVT::f32, SDValue(U32, 0), StoredVal);
62506277
SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, In32);
62516278
SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i32, BCast,
62526279
DAG.getConstant(16, DL, MVT::i32));
6280+
6281+
if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
6282+
return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
6283+
Shft, AtomicSt->getBasePtr(), AtomicSt->getMemOperand());
6284+
6285+
StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
62536286
return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(),
62546287
MVT::i16, St->getMemOperand());
62556288
}
@@ -6373,8 +6406,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
63736406
case ISD::ATOMIC_SWAP:
63746407
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
63756408
case ISD::ATOMIC_STORE:
6409+
return lowerATOMIC_STORE(Op, DAG);
63766410
case ISD::ATOMIC_LOAD:
6377-
return lowerATOMIC_LDST_I128(Op, DAG);
6411+
return lowerATOMIC_LOAD(Op, DAG);
63786412
case ISD::ATOMIC_LOAD_ADD:
63796413
return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
63806414
case ISD::ATOMIC_LOAD_SUB:

llvm/lib/Target/SystemZ/SystemZISelLowering.h

+2
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,8 @@ class SystemZTargetLowering : public TargetLowering {
698698
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
699699
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
700700
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
701+
SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
702+
SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
701703
SDValue lowerATOMIC_LDST_I128(SDValue Op, SelectionDAG &DAG) const;
702704
SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
703705
unsigned Opcode) const;

llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp

+46
Original file line numberDiff line numberDiff line change
@@ -995,8 +995,31 @@ void SystemZInstrInfo::storeRegToStackSlot(
995995
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
996996
bool isKill, int FrameIdx, const TargetRegisterClass *RC,
997997
const TargetRegisterInfo *TRI, Register VReg) const {
998+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
998999
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
9991000

1001+
// There are no fp16 load/store instructions, so need to save/restore via
1002+
// GPR (TODO: Use VSTEH in case of vector support).
1003+
if (RC == &SystemZ::FP16BitRegClass) {
1004+
assert(!MRI.isSSA() && MRI.getNumVirtRegs() &&
1005+
"Expected non-SSA form with virtual registers.");
1006+
Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
1007+
Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass);
1008+
BuildMI(MBB, MBBI, DL, get(SystemZ::COPY))
1009+
.addReg(FP64Reg, RegState::DefineNoRead, SystemZ::subreg_h16)
1010+
.addReg(SrcReg, getKillRegState(isKill));
1011+
BuildMI(MBB, MBBI, DL, get(SystemZ::LGDR), GR64Reg)
1012+
.addReg(FP64Reg, RegState::Kill);
1013+
BuildMI(MBB, MBBI, DL, get(SystemZ::SRLG), GR64Reg)
1014+
.addReg(GR64Reg)
1015+
.addReg(0)
1016+
.addImm(48);
1017+
addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::STH))
1018+
.addReg(GR64Reg, RegState::Kill, SystemZ::subreg_l32),
1019+
FrameIdx);
1020+
return;
1021+
}
1022+
10001023
// Callers may expect a single instruction, so keep 128-bit moves
10011024
// together for now and lower them after register allocation.
10021025
unsigned LoadOpcode, StoreOpcode;
@@ -1012,8 +1035,31 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
10121035
const TargetRegisterClass *RC,
10131036
const TargetRegisterInfo *TRI,
10141037
Register VReg) const {
1038+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10151039
DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
10161040

1041+
// There are no fp16 load/store instructions, so need to save/restore via
1042+
// GPR (TODO: Use VLEH in case of vector support).
1043+
if (RC == &SystemZ::FP16BitRegClass) {
1044+
assert(!MRI.isSSA() && MRI.getNumVirtRegs() &&
1045+
"Expected non-SSA form with virtual registers.");
1046+
Register GR64Reg = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
1047+
Register FP64Reg = MRI.createVirtualRegister(&SystemZ::FP64BitRegClass);
1048+
addFrameReference(BuildMI(MBB, MBBI, DL, get(SystemZ::LH))
1049+
.addReg(GR64Reg, RegState::DefineNoRead,
1050+
SystemZ::subreg_l32),
1051+
FrameIdx);
1052+
BuildMI(MBB, MBBI, DL, get(SystemZ::SLLG), GR64Reg)
1053+
.addReg(GR64Reg)
1054+
.addReg(0)
1055+
.addImm(48);
1056+
BuildMI(MBB, MBBI, DL, get(SystemZ::LDGR), FP64Reg)
1057+
.addReg(GR64Reg, RegState::Kill);
1058+
BuildMI(MBB, MBBI, DL, get(SystemZ::COPY), DestReg)
1059+
.addReg(FP64Reg, RegState::Kill, SystemZ::subreg_h16);
1060+
return;
1061+
}
1062+
10171063
// Callers may expect a single instruction, so keep 128-bit moves
10181064
// together for now and lower them after register allocation.
10191065
unsigned LoadOpcode, StoreOpcode;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; Test fp16 atomic loads.
3+
;
4+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5+
6+
define half @f1(ptr %src) {
7+
; CHECK-LABEL: f1:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: lh %r0, 0(%r2)
10+
; CHECK-NEXT: sllg %r0, %r0, 48
11+
; CHECK-NEXT: ldgr %f0, %r0
12+
; CHECK-NEXT: # kill: def $f0h killed $f0h killed $f0d
13+
; CHECK-NEXT: br %r14
14+
%val = load atomic half, ptr %src seq_cst, align 2
15+
ret half %val
16+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; Test half atomic stores.
3+
;
4+
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
5+
6+
define void @f1(ptr %src, half %val) {
7+
; CHECK-LABEL: f1:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: # kill: def $f0h killed $f0h def $f0d
10+
; CHECK-NEXT: lgdr %r0, %f0
11+
; CHECK-NEXT: srlg %r0, %r0, 48
12+
; CHECK-NEXT: sth %r0, 0(%r2)
13+
; CHECK-NEXT: bcr 15, %r0
14+
; CHECK-NEXT: br %r14
15+
store atomic half %val, ptr %src seq_cst, align 2
16+
ret void
17+
}

0 commit comments

Comments
 (0)