@@ -23589,17 +23589,16 @@ static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) {
2358923589 return DAG.getMergeValues({Extract, TokenFactor}, DL);
2359023590}
2359123591
23592- // Replace scalable loads with fixed loads when vscale_range(1, 1).
23592+ // Replace packed scalable loads with fixed loads when vscale_range(1, 1).
2359323593// This enables further optimisations such as LDP folds.
2359423594static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG,
23595+ TargetLowering::DAGCombinerInfo &DCI,
2359523596 const AArch64Subtarget *Subtarget) {
2359623597 EVT MemVT = LD->getMemoryVT();
23597- if (!Subtarget->isNeonAvailable() || !MemVT.isScalableVector() ||
23598- Subtarget->getMaxSVEVectorSizeInBits() != AArch64::SVEBitsPerBlock)
23599- return SDValue();
23600-
23601- // Skip unpacked types given their different layouts between Neon and SVE.
23602- if (MemVT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
23598+ if (!DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23599+ !MemVT.isScalableVector() || LD->getExtensionType() != ISD::NON_EXTLOAD ||
23600+ MemVT.getSizeInBits().getKnownMinValue() != 128 ||
23601+ Subtarget->getMaxSVEVectorSizeInBits() != 128)
2360323602 return SDValue();
2360423603
2360523604 SDLoc DL(LD);
@@ -23609,9 +23608,7 @@ static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG,
2360923608 NewVT, DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
2361023609 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo());
2361123610 SDValue Insert = convertToScalableVector(DAG, MemVT, NewLoad);
23612- SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
23613- {SDValue(cast<SDNode>(NewLoad), 1)});
23614- return DAG.getMergeValues({Insert, TokenFactor}, DL);
23611+ return DAG.getMergeValues({Insert, SDValue(cast<SDNode>(NewLoad), 1)}, DL);
2361523612}
2361623613
2361723614// Perform TBI simplification if supported by the target and try to break up
@@ -23651,7 +23648,7 @@ static SDValue performLOADCombine(SDNode *N,
2365123648 if (SDValue Res = combineV3I8LoadExt(LD, DAG))
2365223649 return Res;
2365323650
23654- if (SDValue Res = combineVScale1Load(LD, DAG, Subtarget))
23651+ if (SDValue Res = combineVScale1Load(LD, DAG, DCI, Subtarget))
2365523652 return Res;
2365623653
2365723654 if (!LD->isNonTemporal())
@@ -23912,18 +23909,17 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG,
2391223909 return Chain;
2391323910}
2391423911
23915- // Replace scalable stores with fixed stores when vscale_range(1, 1).
23912+ // Replace packed scalable stores with fixed stores when vscale_range(1, 1).
2391623913static SDValue combineVScale1Store(StoreSDNode *ST, SelectionDAG &DAG,
23914+ TargetLowering::DAGCombinerInfo &DCI,
2391723915 const AArch64Subtarget *Subtarget) {
2391823916 SDValue Value = ST->getValue();
2391923917 EVT ValueVT = Value.getValueType();
2392023918 if (ST->isVolatile() || !Subtarget->isLittleEndian() ||
23921- !Subtarget->isNeonAvailable() || !ValueVT.isScalableVector() ||
23922- Subtarget->getMaxSVEVectorSizeInBits() != AArch64::SVEBitsPerBlock)
23923- return SDValue();
23924-
23925- // Skip unpacked types given their different layouts between Neon and SVE.
23926- if (ValueVT.getSizeInBits().getKnownMinValue() != AArch64::SVEBitsPerBlock)
23919+ !DCI.isBeforeLegalize() || !Subtarget->hasNEON() ||
23920+ !ValueVT.isScalableVector() || ST->isTruncatingStore() ||
23921+ ValueVT.getSizeInBits().getKnownMinValue() != 128 ||
23922+ Subtarget->getMaxSVEVectorSizeInBits() != 128)
2392723923 return SDValue();
2392823924
2392923925 SDLoc DL(ST);
@@ -23970,7 +23966,7 @@ static SDValue performSTORECombine(SDNode *N,
2397023966 if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget))
2397123967 return Res;
2397223968
23973- if (SDValue Res = combineVScale1Store(ST, DAG, Subtarget))
23969+ if (SDValue Res = combineVScale1Store(ST, DAG, DCI, Subtarget))
2397423970 return Res;
2397523971
2397623972 // If this is an FP_ROUND followed by a store, fold this into a truncating
0 commit comments