@@ -152,6 +152,8 @@ class Tokenizer:
152152 MERGES = "tokenizer.ggml.merges"
153153 BOS_ID = "tokenizer.ggml.bos_token_id"
154154 EOS_ID = "tokenizer.ggml.eos_token_id"
155+ EOT_ID = "tokenizer.ggml.eot_token_id"
156+ EOM_ID = "tokenizer.ggml.eom_token_id"
155157 UNK_ID = "tokenizer.ggml.unknown_token_id"
156158 SEP_ID = "tokenizer.ggml.seperator_token_id"
157159 PAD_ID = "tokenizer.ggml.padding_token_id"
@@ -168,11 +170,16 @@ class Tokenizer:
168170 CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
169171 CHAT_TEMPLATES = "tokenizer.chat_templates"
170172 # FIM/Infill special tokens constants
173+ FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
174+ FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
175+ FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
176+ FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
177+ FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
178+ FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
179+ # deprecated:
171180 PREFIX_ID = "tokenizer.ggml.prefix_token_id"
172181 SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
173182 MIDDLE_ID = "tokenizer.ggml.middle_token_id"
174- EOT_ID = "tokenizer.ggml.eot_token_id"
175- EOM_ID = "tokenizer.ggml.eom_token_id"
176183
177184 class Adapter :
178185 TYPE = "adapter.type"
@@ -1579,15 +1586,24 @@ def get_type(val: Any) -> GGUFValueType:
15791586KEY_TOKENIZER_MERGES = Keys .Tokenizer .MERGES
15801587KEY_TOKENIZER_BOS_ID = Keys .Tokenizer .BOS_ID
15811588KEY_TOKENIZER_EOS_ID = Keys .Tokenizer .EOS_ID
1589+ KEY_TOKENIZER_EOT_ID = Keys .Tokenizer .EOT_ID
1590+ KEY_TOKENIZER_EOM_ID = Keys .Tokenizer .EOM_ID
15821591KEY_TOKENIZER_UNK_ID = Keys .Tokenizer .UNK_ID
15831592KEY_TOKENIZER_SEP_ID = Keys .Tokenizer .SEP_ID
15841593KEY_TOKENIZER_PAD_ID = Keys .Tokenizer .PAD_ID
15851594KEY_TOKENIZER_CLS_ID = Keys .Tokenizer .CLS_ID
15861595KEY_TOKENIZER_MASK_ID = Keys .Tokenizer .MASK_ID
15871596KEY_TOKENIZER_HF_JSON = Keys .Tokenizer .HF_JSON
15881597KEY_TOKENIZER_RWKV = Keys .Tokenizer .RWKV
1589- KEY_TOKENIZER_PRIFIX_ID = Keys .Tokenizer .PREFIX_ID
1598+
1599+ KEY_TOKENIZER_FIM_PRE_ID = Keys .Tokenizer .FIM_PRE_ID
1600+ KEY_TOKENIZER_FIM_SUF_ID = Keys .Tokenizer .FIM_SUF_ID
1601+ KEY_TOKENIZER_FIM_MID_ID = Keys .Tokenizer .FIM_MID_ID
1602+ KEY_TOKENIZER_FIM_PAD_ID = Keys .Tokenizer .FIM_PAD_ID
1603+ KEY_TOKENIZER_FIM_REP_ID = Keys .Tokenizer .FIM_REP_ID
1604+ KEY_TOKENIZER_FIM_SEP_ID = Keys .Tokenizer .FIM_SEP_ID
1605+
1606+ # deprecated
1607+ KEY_TOKENIZER_PREFIX_ID = Keys .Tokenizer .PREFIX_ID
15901608KEY_TOKENIZER_SUFFIX_ID = Keys .Tokenizer .SUFFIX_ID
15911609KEY_TOKENIZER_MIDDLE_ID = Keys .Tokenizer .MIDDLE_ID
1592- KEY_TOKENIZER_EOT_ID = Keys .Tokenizer .EOT_ID
1593- KEY_TOKENIZER_EOM_ID = Keys .Tokenizer .EOM_ID
0 commit comments