@@ -7850,83 +7850,18 @@ def repack_mxfp4(self, new_name: str, blocks: Tensor, scales: Tensor):
78507850 new_data = new_data .reshape (new_data .shape [0 ], new_data .shape [1 ], new_data .shape [2 ] * new_data .shape [3 ])
78517851 self .gguf_writer .add_tensor (new_name , new_data , raw_dtype = gguf .GGMLQuantizationType .MXFP4 )
78527852
7853- def convert_moe_packed_tensors (
7854- self ,
7855- new_name : str ,
7856- blocks ,
7857- scales ,
7858- * ,
7859- dtype : torch .dtype = torch .float32 ,
7860- rows_per_chunk : int = 32768 * 1024 ,
7861- ) -> tuple [str , Tensor ]:
7862- import math
7863-
7864- scales = scales .to (torch .int32 ) - 127
7865-
7866- assert blocks .shape [:- 1 ] == scales .shape , f"{ blocks .shape = } does not match { scales .shape = } "
7867-
7868- FP4_VALUES = [
7869- + 0.0 ,
7870- + 0.5 ,
7871- + 1.0 ,
7872- + 1.5 ,
7873- + 2.0 ,
7874- + 3.0 ,
7875- + 4.0 ,
7876- + 6.0 ,
7877- - 0.0 ,
7878- - 0.5 ,
7879- - 1.0 ,
7880- - 1.5 ,
7881- - 2.0 ,
7882- - 3.0 ,
7883- - 4.0 ,
7884- - 6.0 ,
7885- ]
7886- blocks = blocks .to (device = "cpu" )
7887- scales = scales .to (device = "cpu" )
7888- lut = torch .tensor (FP4_VALUES , dtype = dtype , device = blocks .device )
7889-
7890- * prefix_shape , G , B = blocks .shape
7891- rows_total = math .prod (prefix_shape ) * G
7892-
7893- blocks = blocks .reshape (rows_total , B )
7894- scales = scales .reshape (rows_total , 1 )
7895-
7896- out = torch .empty (rows_total , B * 2 , dtype = dtype , device = "cpu" )
7897-
7898- for r0 in range (0 , rows_total , rows_per_chunk ):
7899- r1 = min (r0 + rows_per_chunk , rows_total )
7900-
7901- blk = blocks [r0 :r1 ]
7902- exp = scales [r0 :r1 ]
7903-
7904- # nibble indices -> int64
7905- idx_lo = (blk & 0x0F ).to (torch .long )
7906- idx_hi = (blk >> 4 ).to (torch .long )
7907-
7908- sub = out [r0 :r1 ]
7909- sub [:, 0 ::2 ] = lut [idx_lo ]
7910- sub [:, 1 ::2 ] = lut [idx_hi ]
7911-
7912- torch .ldexp (sub , exp , out = sub )
7913- del idx_lo , idx_hi , blk , exp
7914-
7915- out = out .reshape (* prefix_shape , G , B * 2 ).view (* prefix_shape , G * B * 2 )
7916- logger .info (f"Unpacked { new_name } with shape { out .shape } from MXFP4" )
7917- return new_name , out
7918-
79197853 def generate_extra_tensors (self ) -> Iterable [tuple [str , Tensor ]]:
79207854 blocks0 : Tensor = torch .zeros (1 )
79217855 blocks1 : Tensor = torch .zeros (1 )
7856+ found_mxfp4_tensors = False
79227857 # we assume that tensors are loaded in the correct order
79237858 for name , data_torch in self .get_tensors ():
79247859 if "mlp.experts.down_proj_blocks" in name :
79257860 blocks0 = data_torch
79267861 elif "mlp.experts.down_proj_scales" in name :
79277862 new_name = self .map_tensor_name (name .replace ("_scales" , ".weight" ))
79287863 self .repack_mxfp4 (new_name , blocks0 , data_torch )
7929- # yield self.convert_moe_packed_tensors(new_name, blocks0, data_torch)
7864+ found_mxfp4_tensors = True
79307865 elif "mlp.experts.gate_up_proj_blocks" in name :
79317866 blocks0 , blocks1 = data_torch [:, ::2 , :, :], data_torch [:, 1 ::2 , :, :]
79327867 elif "mlp.experts.gate_up_proj_scales" in name :
@@ -7935,8 +7870,9 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
79357870 new_name_up = self .map_tensor_name (name .replace ("gate_up_proj_scales" , "up_proj.weight" ))
79367871 self .repack_mxfp4 (new_name_gate , blocks0 , scales0 )
79377872 self .repack_mxfp4 (new_name_up , blocks1 , scales1 )
7938- # yield self.convert_moe_packed_tensors(new_name_gate, blocks0, scales0)
7939- # yield self.convert_moe_packed_tensors(new_name_up, blocks1, scales1)
7873+ found_mxfp4_tensors = True
7874+ if not found_mxfp4_tensors :
7875+ raise ValueError ("No MXFP4 tensors found in the model. Please make sure you are using MXFP4 model." )
79407876 return []
79417877
79427878 def modify_tensors (self , data_torch : Tensor , name : str , bid : int | None ) -> Iterable [tuple [str , Tensor ]]:
0 commit comments