@@ -13,7 +13,7 @@ class TensorNameMap:
1313 "transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414 "transformer.word_embeddings" , # falcon
1515 "word_embeddings" , # bloom
16- "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid
16+ "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 granite-hybrid exaone4
1717 "tok_embeddings" , # llama-pth
1818 "embeddings.word_embeddings" , # bert nomic-bert
1919 "language_model.embedding.word_embeddings" , # persimmon
@@ -62,7 +62,7 @@ class TensorNameMap:
6262 # Output
6363 MODEL_TENSOR .OUTPUT : (
6464 "embed_out" , # gptneox
65- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone exaone4 olmoe olmo2 phimoe
6666 "output" , # llama-pth bloom internlm2
6767 "word_embeddings_for_head" , # persimmon
6868 "lm_head.linear" , # phi2
@@ -76,7 +76,7 @@ class TensorNameMap:
7676 MODEL_TENSOR .OUTPUT_NORM : (
7777 "gpt_neox.final_layer_norm" , # gptneox
7878 "transformer.ln_f" , # gpt2 gpt-j falcon jais exaone
79- "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe
79+ "model.norm" , # llama-hf baichuan internlm2 olmoe olmo2 phimoe exaone4
8080 "norm" , # llama-pth
8181 "transformer.norm_f" , # mpt dbrx
8282 "ln_f" , # refact bloom qwen gpt2
@@ -168,7 +168,7 @@ class TensorNameMap:
168168
169169 # Attention query
170170 MODEL_TENSOR .ATTN_Q : (
171- "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
171+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
172172 "model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
173173 "layers.{bid}.attention.wq" , # llama-pth
174174 "encoder.layer.{bid}.attention.self.query" , # bert
@@ -183,7 +183,7 @@ class TensorNameMap:
183183
184184 # Attention key
185185 MODEL_TENSOR .ATTN_K : (
186- "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
186+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
187187 "model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
188188 "layers.{bid}.attention.wk" , # llama-pth
189189 "encoder.layer.{bid}.attention.self.key" , # bert
@@ -199,7 +199,7 @@ class TensorNameMap:
199199
200200 # Attention value
201201 MODEL_TENSOR .ATTN_V : (
202- "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
202+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
203203 "layers.{bid}.attention.wv" , # llama-pth
204204 "encoder.layer.{bid}.attention.self.value" , # bert
205205 "transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -219,7 +219,7 @@ class TensorNameMap:
219219 "transformer.blocks.{bid}.attn.out_proj" , # mpt
220220 "transformer.h.{bid}.self_attention.dense" , # falcon
221221 "h.{bid}.self_attention.dense" , # bloom
222- "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
222+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe exaone4
223223 "model.layers.{bid}.self_attn.linear_attn" , # deci
224224 "layers.{bid}.attention.wo" , # llama-pth
225225 "encoder.layer.{bid}.attention.output.dense" , # bert
@@ -252,7 +252,7 @@ class TensorNameMap:
252252 ),
253253
254254 MODEL_TENSOR .ATTN_POST_NORM : (
255- "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
255+ "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 exaone4 # ge
256256 "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
257257 ),
258258
@@ -293,7 +293,7 @@ class TensorNameMap:
293293
294294 # Post feed-forward norm
295295 MODEL_TENSOR .FFN_POST_NORM : (
296- "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
296+ "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2 exaone4
297297 "model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
298298 "model.layers.{bid}.feed_forward.up_proj" ,
299299 ),
@@ -325,7 +325,7 @@ class TensorNameMap:
325325 "transformer.blocks.{bid}.ffn.up_proj" , # mpt
326326 "transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
327327 "h.{bid}.mlp.dense_h_to_4h" , # bloom
328- "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
328+ "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2 exaone4
329329 "layers.{bid}.feed_forward.w3" , # llama-pth
330330 "encoder.layer.{bid}.intermediate.dense" , # bert
331331 "transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -378,7 +378,7 @@ class TensorNameMap:
378378
379379 # Feed-forward gate
380380 MODEL_TENSOR .FFN_GATE : (
381- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
381+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2 exaone4
382382 "layers.{bid}.feed_forward.w1" , # llama-pth
383383 "transformer.h.{bid}.mlp.w2" , # qwen
384384 "transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -415,7 +415,7 @@ class TensorNameMap:
415415 "transformer.blocks.{bid}.ffn.down_proj" , # mpt
416416 "transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
417417 "h.{bid}.mlp.dense_4h_to_h" , # bloom
418- "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
418+ "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2 exaone4
419419 "layers.{bid}.feed_forward.w2" , # llama-pth
420420 "encoder.layer.{bid}.output.dense" , # bert
421421 "transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -462,7 +462,7 @@ class TensorNameMap:
462462 "language_model.encoder.layers.{bid}.self_attention.q_layernorm" ,
463463 "model.layers.{bid}.self_attn.q_layernorm" , # persimmon
464464 "model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
465- "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
465+ "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2 exaone4
466466 "transformer.blocks.{bid}.attn.q_ln" , # sea-lion
467467 "encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
468468 "transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -472,7 +472,7 @@ class TensorNameMap:
472472 "language_model.encoder.layers.{bid}.self_attention.k_layernorm" ,
473473 "model.layers.{bid}.self_attn.k_layernorm" , # persimmon
474474 "model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
475- "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
475+ "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2 exaone4
476476 "transformer.blocks.{bid}.attn.k_ln" , # sea-lion
477477 "encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
478478 "transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments