Skip to content

Commit 681888e

Browse files
committed
[AArch64-SVE]: Force generating code compatible to streaming mode.
When streaming mode is enabled, lower some operations and disable some code paths; to force generateing code compatible to streaming mode. Add streaming-mode flag for new sve-fixed-length testing files: build_vector.ll concat.ll extract-subvector.ll extract-vector-elt.ll int-shifts.ll loads.ll shuffle.ll stores.ll Differential Revision: https://reviews.llvm.org/D135564
1 parent 325a308 commit 681888e

11 files changed

+1913
-23
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,6 +1391,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13911391
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
13921392
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
13931393

1394+
if (Subtarget->forceStreamingCompatibleSVE()) {
1395+
for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1396+
MVT::v4i32, MVT::v2i64})
1397+
addTypeForStreamingSVE(VT);
1398+
1399+
for (MVT VT :
1400+
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1401+
addTypeForStreamingSVE(VT);
1402+
}
1403+
13941404
// NOTE: Currently this has to happen after computeRegisterProperties rather
13951405
// than the preferred option of combining it with the addRegisterClass call.
13961406
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1597,6 +1607,14 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
15971607
return false;
15981608
}
15991609

1610+
void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
1611+
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1612+
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613+
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1614+
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1615+
setOperationAction(ISD::AND, VT, Custom);
1616+
}
1617+
16001618
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
16011619
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
16021620

@@ -5773,8 +5791,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
57735791
case ISD::MLOAD:
57745792
return LowerMLOAD(Op, DAG);
57755793
case ISD::LOAD:
5776-
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
5777-
Subtarget->forceStreamingCompatibleSVE()))
5794+
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
57785795
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
57795796
return LowerLOAD(Op, DAG);
57805797
case ISD::ADD:
@@ -11400,9 +11417,13 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1140011417
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1140111418
const APInt &Bits,
1140211419
const SDValue *LHS = nullptr) {
11420+
EVT VT = Op.getValueType();
11421+
if (VT.isFixedLengthVector() &&
11422+
DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11423+
return SDValue();
11424+
1140311425
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
1140411426
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11405-
EVT VT = Op.getValueType();
1140611427
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
1140711428
bool isAdvSIMDModImm = false;
1140811429
uint64_t Shift;
@@ -11448,9 +11469,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1144811469
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
1144911470
const APInt &Bits,
1145011471
const SDValue *LHS = nullptr) {
11472+
EVT VT = Op.getValueType();
11473+
if (VT.isFixedLengthVector() &&
11474+
DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11475+
return SDValue();
11476+
1145111477
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
1145211478
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11453-
EVT VT = Op.getValueType();
1145411479
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
1145511480
bool isAdvSIMDModImm = false;
1145611481
uint64_t Shift;
@@ -12128,7 +12153,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1212812153

1212912154
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
1213012155
SelectionDAG &DAG) const {
12131-
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
12156+
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12157+
Subtarget->forceStreamingCompatibleSVE()))
1213212158
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
1213312159

1213412160
assert(Op.getValueType().isScalableVector() &&
@@ -12234,7 +12260,8 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
1223412260
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
1223512261
}
1223612262

12237-
if (useSVEForFixedLengthVectorVT(VT))
12263+
if (useSVEForFixedLengthVectorVT(VT,
12264+
Subtarget->forceStreamingCompatibleSVE()))
1223812265
return LowerFixedLengthExtractVectorElt(Op, DAG);
1223912266

1224012267
// Check for non-constant or out of range lane.
@@ -12296,10 +12323,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
1229612323
// If this is extracting the upper 64-bits of a 128-bit vector, we match
1229712324
// that directly.
1229812325
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
12299-
InVT.getSizeInBits() == 128)
12326+
InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE())
1230012327
return Op;
1230112328

12302-
if (useSVEForFixedLengthVectorVT(InVT)) {
12329+
if (useSVEForFixedLengthVectorVT(InVT,
12330+
Subtarget->forceStreamingCompatibleSVE())) {
1230312331
SDLoc DL(Op);
1230412332

1230512333
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
@@ -12487,7 +12515,8 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1248712515

1248812516
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1248912517
// Currently no fixed length shuffles that require SVE are legal.
12490-
if (useSVEForFixedLengthVectorVT(VT))
12518+
if (useSVEForFixedLengthVectorVT(VT,
12519+
Subtarget->forceStreamingCompatibleSVE()))
1249112520
return false;
1249212521

1249312522
if (VT.getVectorNumElements() == 4 &&
@@ -12597,7 +12626,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1259712626

1259812627
switch (Op.getOpcode()) {
1259912628
case ISD::SHL:
12600-
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
12629+
if (VT.isScalableVector() ||
12630+
useSVEForFixedLengthVectorVT(VT,
12631+
Subtarget->forceStreamingCompatibleSVE()))
1260112632
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
1260212633

1260312634
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
@@ -12609,7 +12640,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
1260912640
Op.getOperand(0), Op.getOperand(1));
1261012641
case ISD::SRA:
1261112642
case ISD::SRL:
12612-
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
12643+
if (VT.isScalableVector() ||
12644+
useSVEForFixedLengthVectorVT(
12645+
VT, Subtarget->forceStreamingCompatibleSVE())) {
1261312646
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
1261412647
: AArch64ISD::SRL_PRED;
1261512648
return LowerToPredicatedOp(Op, DAG, Opc);
@@ -14008,6 +14041,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
1400814041
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
1400914042
ShuffleVectorInst *SVI,
1401014043
unsigned Factor) const {
14044+
// Skip if streaming compatible SVE is enabled, because it generates invalid
14045+
// code in streaming mode when SVE length is not specified.
14046+
if (Subtarget->forceStreamingCompatibleSVE())
14047+
return false;
14048+
1401114049
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
1401214050
"Invalid interleave factor");
1401314051

@@ -22489,7 +22527,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
2248922527
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
2249022528
SelectionDAG &DAG) const {
2249122529
EVT VT = Op.getValueType();
22492-
assert(useSVEForFixedLengthVectorVT(VT) &&
22530+
assert(VT.isFixedLengthVector() && isTypeLegal(VT) &&
2249322531
"Only expected to lower fixed length vector operation!");
2249422532
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
2249522533

@@ -22505,7 +22543,8 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
2250522543
}
2250622544

2250722545
// "cast" fixed length vector to a scalable vector.
22508-
assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
22546+
assert(V.getValueType().isFixedLengthVector() &&
22547+
isTypeLegal(V.getValueType()) &&
2250922548
"Only fixed length vectors are supported!");
2251022549
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
2251122550
}

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,7 @@ class AArch64TargetLowering : public TargetLowering {
899899
bool isExtFreeImpl(const Instruction *Ext) const override;
900900

901901
void addTypeForNEON(MVT VT);
902+
void addTypeForStreamingSVE(MVT VT);
902903
void addTypeForFixedLengthSVE(MVT VT);
903904
void addDRTypeForNEON(MVT VT);
904905
void addQRTypeForNEON(MVT VT);

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3032,7 +3032,7 @@ let Predicates = [HasSVEorSME] in {
30323032
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
30333033

30343034
// Extract element from vector with immediate index that's within the bottom 128-bits.
3035-
let AddedComplexity = 1 in {
3035+
let Predicates = [NotInStreamingSVEMode], AddedComplexity = 1 in {
30363036
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
30373037
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
30383038
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
@@ -3041,8 +3041,9 @@ let Predicates = [HasSVEorSME] in {
30413041
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
30423042
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
30433043
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
3044-
}
3044+
} // End NotInStreamingSVEMode
30453045

3046+
let Predicates = [NotInStreamingSVEMode] in {
30463047
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
30473048
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
30483049
def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
@@ -3055,6 +3056,7 @@ let Predicates = [HasSVEorSME] in {
30553056

30563057
def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
30573058
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
3059+
} // End NotInStreamingSVEMode
30583060

30593061
// Extract first element from vector.
30603062
let AddedComplexity = 2 in {
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define void @build_vector_7_inc1_v4i1(ptr %a) #0 {
7+
; CHECK-LABEL: build_vector_7_inc1_v4i1:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: mov w8, #5
10+
; CHECK-NEXT: strb w8, [x0]
11+
; CHECK-NEXT: ret
12+
store <4 x i1> <i1 true, i1 false, i1 true, i1 false>, ptr %a, align 1
13+
ret void
14+
}
15+
16+
define void @build_vector_7_inc1_v32i8(ptr %a) #0 {
17+
; CHECK-LABEL: build_vector_7_inc1_v32i8:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: index z0.b, #0, #1
20+
; CHECK-NEXT: mov z1.d, z0.d
21+
; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
22+
; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17
23+
; CHECK-NEXT: stp q0, q1, [x0]
24+
; CHECK-NEXT: ret
25+
store <32 x i8> <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38>, ptr %a, align 1
26+
ret void
27+
}
28+
29+
define void @build_vector_0_inc2_v16i16(ptr %a) #0 {
30+
; CHECK-LABEL: build_vector_0_inc2_v16i16:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: index z0.h, #0, #2
33+
; CHECK-NEXT: str q0, [x0]
34+
; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10
35+
; CHECK-NEXT: str q0, [x0, #16]
36+
; CHECK-NEXT: ret
37+
store <16 x i16> <i16 0, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30>, ptr %a, align 2
38+
ret void
39+
}
40+
41+
; Negative const stride.
42+
define void @build_vector_0_dec3_v8i32(ptr %a) #0 {
43+
; CHECK-LABEL: build_vector_0_dec3_v8i32:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: index z0.s, #0, #-3
46+
; CHECK-NEXT: mov z1.s, #-12 // =0xfffffffffffffff4
47+
; CHECK-NEXT: add z1.s, z0.s, z1.s
48+
; CHECK-NEXT: stp q0, q1, [x0]
49+
; CHECK-NEXT: ret
50+
store <8 x i32> <i32 0, i32 -3, i32 -6, i32 -9, i32 -12, i32 -15, i32 -18, i32 -21>, ptr %a, align 4
51+
ret void
52+
}
53+
54+
; Constant stride that's too big to be directly encoded into the index.
55+
define void @build_vector_minus2_dec32_v4i64(ptr %a) #0 {
56+
; CHECK-LABEL: build_vector_minus2_dec32_v4i64:
57+
; CHECK: // %bb.0:
58+
; CHECK-NEXT: mov x8, #-32
59+
; CHECK-NEXT: mov z0.d, #-66 // =0xffffffffffffffbe
60+
; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe
61+
; CHECK-NEXT: index z1.d, #0, x8
62+
; CHECK-NEXT: add z0.d, z1.d, z0.d
63+
; CHECK-NEXT: add z1.d, z1.d, z2.d
64+
; CHECK-NEXT: stp q1, q0, [x0]
65+
; CHECK-NEXT: ret
66+
store <4 x i64> <i64 -2, i64 -34, i64 -66, i64 -98>, ptr %a, align 8
67+
ret void
68+
}
69+
70+
; Constant but not a sequence.
71+
define void @build_vector_no_stride_v4i64(ptr %a) #0 {
72+
; CHECK-LABEL: build_vector_no_stride_v4i64:
73+
; CHECK: // %bb.0:
74+
; CHECK-NEXT: index z0.d, #1, #7
75+
; CHECK-NEXT: index z1.d, #0, #4
76+
; CHECK-NEXT: stp q1, q0, [x0]
77+
; CHECK-NEXT: ret
78+
store <4 x i64> <i64 0, i64 4, i64 1, i64 8>, ptr %a, align 8
79+
ret void
80+
}
81+
82+
define void @build_vector_0_inc2_v16f16(ptr %a) #0 {
83+
; CHECK-LABEL: build_vector_0_inc2_v16f16:
84+
; CHECK: // %bb.0:
85+
; CHECK-NEXT: adrp x8, .LCPI6_0
86+
; CHECK-NEXT: adrp x9, .LCPI6_1
87+
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
88+
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_1]
89+
; CHECK-NEXT: stp q1, q0, [x0]
90+
; CHECK-NEXT: ret
91+
store <16 x half> <half 0.0, half 2.0, half 4.0, half 6.0, half 8.0, half 10.0, half 12.0, half 14.0, half 16.0, half 18.0, half 20.0, half 22.0, half 24.0, half 26.0, half 28.0, half 30.0>, ptr %a, align 2
92+
ret void
93+
}
94+
95+
; Negative const stride.
96+
define void @build_vector_0_dec3_v8f32(ptr %a) #0 {
97+
; CHECK-LABEL: build_vector_0_dec3_v8f32:
98+
; CHECK: // %bb.0:
99+
; CHECK-NEXT: adrp x8, .LCPI7_0
100+
; CHECK-NEXT: adrp x9, .LCPI7_1
101+
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
102+
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_1]
103+
; CHECK-NEXT: stp q1, q0, [x0]
104+
; CHECK-NEXT: ret
105+
store <8 x float> <float 0.0, float -3.0, float -6.0, float -9.0, float -12.0, float -15.0, float -18.0, float -21.0>, ptr %a, align 4
106+
ret void
107+
}
108+
109+
; Constant stride that's too big to be directly encoded into the index.
110+
define void @build_vector_minus2_dec32_v4f64(ptr %a) #0 {
111+
; CHECK-LABEL: build_vector_minus2_dec32_v4f64:
112+
; CHECK: // %bb.0:
113+
; CHECK-NEXT: adrp x8, .LCPI8_0
114+
; CHECK-NEXT: adrp x9, .LCPI8_1
115+
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
116+
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_1]
117+
; CHECK-NEXT: stp q1, q0, [x0]
118+
; CHECK-NEXT: ret
119+
store <4 x double> <double -2.0, double -34.0, double -66.0, double -98.0>, ptr %a, align 8
120+
ret void
121+
}
122+
123+
; Constant but not a sequence.
124+
define void @build_vector_no_stride_v4f64(ptr %a) #0 {
125+
; CHECK-LABEL: build_vector_no_stride_v4f64:
126+
; CHECK: // %bb.0:
127+
; CHECK-NEXT: adrp x8, .LCPI9_0
128+
; CHECK-NEXT: adrp x9, .LCPI9_1
129+
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
130+
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_1]
131+
; CHECK-NEXT: stp q1, q0, [x0]
132+
; CHECK-NEXT: ret
133+
store <4 x double> <double 0.0, double 4.0, double 1.0, double 8.0>, ptr %a, align 8
134+
ret void
135+
}
136+
137+
138+
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)