@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494 return false ;
495495
496496 const unsigned Size = Ty.getSizeInBits ();
497+ if (Ty.isPointerVector ())
498+ return true ;
497499 if (Size <= 64 )
498500 return false ;
499501 // Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504 if (!Ty.isVector ())
503505 return true ;
504506
505- if (Ty.isPointerVector ())
506- return true ;
507-
508507 unsigned EltSize = Ty.getScalarSizeInBits ();
509508 return EltSize != 32 && EltSize != 64 ;
510509}
@@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58205819 return Reg;
58215820}
58225821
5823- Register AMDGPULegalizerInfo::fixStoreSourceType (
5824- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5822+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5823+ Register VData, LLT MemTy,
5824+ bool IsFormat) const {
58255825 MachineRegisterInfo *MRI = B.getMRI ();
58265826 LLT Ty = MRI->getType (VData);
58275827
@@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58315831 if (hasBufferRsrcWorkaround (Ty))
58325832 return castBufferRsrcToV4I32 (VData, B);
58335833
5834+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5835+ Ty = getBitcastRegisterType (Ty);
5836+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5837+ }
58345838 // Fixup illegal register types for i8 stores.
58355839 if (Ty == LLT::scalar (8 ) || Ty == S16) {
58365840 Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58485852}
58495853
58505854bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5851- MachineRegisterInfo &MRI,
5852- MachineIRBuilder &B,
5855+ LegalizerHelper &Helper,
58535856 bool IsTyped,
58545857 bool IsFormat) const {
5858+ MachineIRBuilder &B = Helper.MIRBuilder ;
5859+ MachineRegisterInfo &MRI = *B.getMRI ();
5860+
58555861 Register VData = MI.getOperand (1 ).getReg ();
58565862 LLT Ty = MRI.getType (VData);
58575863 LLT EltTy = Ty.getScalarType ();
58585864 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
58595865 const LLT S32 = LLT::scalar (32 );
58605866
5861- VData = fixStoreSourceType (B, VData, IsFormat);
5862- castBufferRsrcArgToV4I32 (MI, B, 2 );
5863- Register RSrc = MI.getOperand (2 ).getReg ();
5864-
58655867 MachineMemOperand *MMO = *MI.memoperands_begin ();
58665868 const int MemSize = MMO->getSize ().getValue ();
5869+ LLT MemTy = MMO->getMemoryType ();
5870+
5871+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5872+
5873+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5874+ Register RSrc = MI.getOperand (2 ).getReg ();
58675875
58685876 unsigned ImmOffset;
58695877
@@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59565964}
59575965
59585966bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5959- MachineRegisterInfo &MRI,
5960- MachineIRBuilder &B,
5967+ LegalizerHelper &Helper,
59615968 bool IsFormat,
59625969 bool IsTyped) const {
5970+ MachineIRBuilder &B = Helper.MIRBuilder ;
5971+ MachineRegisterInfo &MRI = *B.getMRI ();
5972+ GISelChangeObserver &Observer = Helper.Observer ;
5973+
59635974 // FIXME: Verifier should enforce 1 MMO for these intrinsics.
59645975 MachineMemOperand *MMO = *MI.memoperands_begin ();
59655976 const LLT MemTy = MMO->getMemoryType ();
@@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
60086019 // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
60096020 // logic doesn't have to handle that case.
60106021 if (hasBufferRsrcWorkaround (Ty)) {
6022+ Observer.changingInstr (MI);
60116023 Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6024+ Observer.changedInstr (MI);
60126025 Dst = MI.getOperand (0 ).getReg ();
6026+ B.setInsertPt (B.getMBB (), MI);
60136027 }
6028+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6029+ Ty = getBitcastRegisterType (Ty);
6030+ Observer.changingInstr (MI);
6031+ Helper.bitcastDst (MI, Ty, 0 );
6032+ Observer.changedInstr (MI);
6033+ Dst = MI.getOperand (0 ).getReg ();
6034+ B.setInsertPt (B.getMBB (), MI);
6035+ }
6036+
60146037 LLT EltTy = Ty.getScalarType ();
60156038 const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
60166039 const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73907413 case Intrinsic::amdgcn_raw_ptr_buffer_store:
73917414 case Intrinsic::amdgcn_struct_buffer_store:
73927415 case Intrinsic::amdgcn_struct_ptr_buffer_store:
7393- return legalizeBufferStore (MI, MRI, B , false , false );
7416+ return legalizeBufferStore (MI, Helper , false , false );
73947417 case Intrinsic::amdgcn_raw_buffer_store_format:
73957418 case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73967419 case Intrinsic::amdgcn_struct_buffer_store_format:
73977420 case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7398- return legalizeBufferStore (MI, MRI, B , false , true );
7421+ return legalizeBufferStore (MI, Helper , false , true );
73997422 case Intrinsic::amdgcn_raw_tbuffer_store:
74007423 case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
74017424 case Intrinsic::amdgcn_struct_tbuffer_store:
74027425 case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7403- return legalizeBufferStore (MI, MRI, B , true , true );
7426+ return legalizeBufferStore (MI, Helper , true , true );
74047427 case Intrinsic::amdgcn_raw_buffer_load:
74057428 case Intrinsic::amdgcn_raw_ptr_buffer_load:
74067429 case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
74097432 case Intrinsic::amdgcn_struct_ptr_buffer_load:
74107433 case Intrinsic::amdgcn_struct_atomic_buffer_load:
74117434 case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7412- return legalizeBufferLoad (MI, MRI, B , false , false );
7435+ return legalizeBufferLoad (MI, Helper , false , false );
74137436 case Intrinsic::amdgcn_raw_buffer_load_format:
74147437 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
74157438 case Intrinsic::amdgcn_struct_buffer_load_format:
74167439 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7417- return legalizeBufferLoad (MI, MRI, B , true , false );
7440+ return legalizeBufferLoad (MI, Helper , true , false );
74187441 case Intrinsic::amdgcn_raw_tbuffer_load:
74197442 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
74207443 case Intrinsic::amdgcn_struct_tbuffer_load:
74217444 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7422- return legalizeBufferLoad (MI, MRI, B , true , true );
7445+ return legalizeBufferLoad (MI, Helper , true , true );
74237446 case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74247447 case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74257448 case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments