@@ -1391,6 +1391,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
1391
1391
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1392
1392
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1393
1393
1394
+ if (Subtarget->forceStreamingCompatibleSVE()) {
1395
+ for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
1396
+ MVT::v4i32, MVT::v2i64})
1397
+ addTypeForStreamingSVE(VT);
1398
+
1399
+ for (MVT VT :
1400
+ {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
1401
+ addTypeForStreamingSVE(VT);
1402
+ }
1403
+
1394
1404
// NOTE: Currently this has to happen after computeRegisterProperties rather
1395
1405
// than the preferred option of combining it with the addRegisterClass call.
1396
1406
if (Subtarget->useSVEForFixedLengthVectors()) {
@@ -1597,6 +1607,14 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
1597
1607
return false;
1598
1608
}
1599
1609
1610
+ void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
1611
+ setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1612
+ setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613
+ setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1614
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1615
+ setOperationAction(ISD::AND, VT, Custom);
1616
+ }
1617
+
1600
1618
void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1601
1619
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
1602
1620
@@ -5773,8 +5791,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
5773
5791
case ISD::MLOAD:
5774
5792
return LowerMLOAD(Op, DAG);
5775
5793
case ISD::LOAD:
5776
- if (useSVEForFixedLengthVectorVT(Op.getValueType(),
5777
- Subtarget->forceStreamingCompatibleSVE()))
5794
+ if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5778
5795
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5779
5796
return LowerLOAD(Op, DAG);
5780
5797
case ISD::ADD:
@@ -11400,9 +11417,13 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11400
11417
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11401
11418
const APInt &Bits,
11402
11419
const SDValue *LHS = nullptr) {
11420
+ EVT VT = Op.getValueType();
11421
+ if (VT.isFixedLengthVector() &&
11422
+ DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11423
+ return SDValue();
11424
+
11403
11425
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11404
11426
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11405
- EVT VT = Op.getValueType();
11406
11427
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
11407
11428
bool isAdvSIMDModImm = false;
11408
11429
uint64_t Shift;
@@ -11448,9 +11469,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11448
11469
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
11449
11470
const APInt &Bits,
11450
11471
const SDValue *LHS = nullptr) {
11472
+ EVT VT = Op.getValueType();
11473
+ if (VT.isFixedLengthVector() &&
11474
+ DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
11475
+ return SDValue();
11476
+
11451
11477
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
11452
11478
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
11453
- EVT VT = Op.getValueType();
11454
11479
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
11455
11480
bool isAdvSIMDModImm = false;
11456
11481
uint64_t Shift;
@@ -12128,7 +12153,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
12128
12153
12129
12154
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
12130
12155
SelectionDAG &DAG) const {
12131
- if (useSVEForFixedLengthVectorVT(Op.getValueType()))
12156
+ if (useSVEForFixedLengthVectorVT(Op.getValueType(),
12157
+ Subtarget->forceStreamingCompatibleSVE()))
12132
12158
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
12133
12159
12134
12160
assert(Op.getValueType().isScalableVector() &&
@@ -12234,7 +12260,8 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
12234
12260
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
12235
12261
}
12236
12262
12237
- if (useSVEForFixedLengthVectorVT(VT))
12263
+ if (useSVEForFixedLengthVectorVT(VT,
12264
+ Subtarget->forceStreamingCompatibleSVE()))
12238
12265
return LowerFixedLengthExtractVectorElt(Op, DAG);
12239
12266
12240
12267
// Check for non-constant or out of range lane.
@@ -12296,10 +12323,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
12296
12323
// If this is extracting the upper 64-bits of a 128-bit vector, we match
12297
12324
// that directly.
12298
12325
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
12299
- InVT.getSizeInBits() == 128)
12326
+ InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE() )
12300
12327
return Op;
12301
12328
12302
- if (useSVEForFixedLengthVectorVT(InVT)) {
12329
+ if (useSVEForFixedLengthVectorVT(InVT,
12330
+ Subtarget->forceStreamingCompatibleSVE())) {
12303
12331
SDLoc DL(Op);
12304
12332
12305
12333
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
@@ -12487,7 +12515,8 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
12487
12515
12488
12516
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
12489
12517
// Currently no fixed length shuffles that require SVE are legal.
12490
- if (useSVEForFixedLengthVectorVT(VT))
12518
+ if (useSVEForFixedLengthVectorVT(VT,
12519
+ Subtarget->forceStreamingCompatibleSVE()))
12491
12520
return false;
12492
12521
12493
12522
if (VT.getVectorNumElements() == 4 &&
@@ -12597,7 +12626,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
12597
12626
12598
12627
switch (Op.getOpcode()) {
12599
12628
case ISD::SHL:
12600
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
12629
+ if (VT.isScalableVector() ||
12630
+ useSVEForFixedLengthVectorVT(VT,
12631
+ Subtarget->forceStreamingCompatibleSVE()))
12601
12632
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
12602
12633
12603
12634
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
@@ -12609,7 +12640,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
12609
12640
Op.getOperand(0), Op.getOperand(1));
12610
12641
case ISD::SRA:
12611
12642
case ISD::SRL:
12612
- if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
12643
+ if (VT.isScalableVector() ||
12644
+ useSVEForFixedLengthVectorVT(
12645
+ VT, Subtarget->forceStreamingCompatibleSVE())) {
12613
12646
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
12614
12647
: AArch64ISD::SRL_PRED;
12615
12648
return LowerToPredicatedOp(Op, DAG, Opc);
@@ -14008,6 +14041,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
14008
14041
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
14009
14042
ShuffleVectorInst *SVI,
14010
14043
unsigned Factor) const {
14044
+ // Skip if streaming compatible SVE is enabled, because it generates invalid
14045
+ // code in streaming mode when SVE length is not specified.
14046
+ if (Subtarget->forceStreamingCompatibleSVE())
14047
+ return false;
14048
+
14011
14049
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
14012
14050
"Invalid interleave factor");
14013
14051
@@ -22489,7 +22527,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
22489
22527
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
22490
22528
SelectionDAG &DAG) const {
22491
22529
EVT VT = Op.getValueType();
22492
- assert(useSVEForFixedLengthVectorVT (VT) &&
22530
+ assert(VT.isFixedLengthVector() && isTypeLegal (VT) &&
22493
22531
"Only expected to lower fixed length vector operation!");
22494
22532
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
22495
22533
@@ -22505,7 +22543,8 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
22505
22543
}
22506
22544
22507
22545
// "cast" fixed length vector to a scalable vector.
22508
- assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
22546
+ assert(V.getValueType().isFixedLengthVector() &&
22547
+ isTypeLegal(V.getValueType()) &&
22509
22548
"Only fixed length vectors are supported!");
22510
22549
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
22511
22550
}
0 commit comments