@@ -975,14 +975,15 @@ static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
975975 (void ) tensor;
976976}
977977
978- static std::string llama_token_to_str (const struct llama_context * ctx, llama_token token) {
978+ static std::string llama_token_to_piece (const struct llama_context * ctx, llama_token token) {
979979 std::vector<char > result (8 , 0 );
980980 const int n_tokens = llama_token_to_piece (llama_get_model (ctx), token, result.data (), result.size ());
981981 if (n_tokens < 0 ) {
982982 result.resize (-n_tokens);
983983 int check = llama_token_to_piece (llama_get_model (ctx), token, result.data (), result.size ());
984984 GGML_ASSERT (check == -n_tokens);
985- } else {
985+ }
986+ else {
986987 result.resize (n_tokens);
987988 }
988989
@@ -1202,10 +1203,10 @@ struct llama_vocab {
12021203 id special_eot_id = 32010 ;
12031204
12041205 int find_bpe_rank (std::string token_left, std::string token_right) const {
1205- replace_all (token_left, " " , " \u0120 " );
1206- replace_all (token_left, " \n " , " \u010A " );
1207- replace_all (token_right, " " , " \u0120 " );
1208- replace_all (token_right, " \n " , " \u010A " );
1206+ GGML_ASSERT (token_left. find ( " " ) == std::string::npos );
1207+ GGML_ASSERT (token_left. find ( " \n " ) == std::string::npos );
1208+ GGML_ASSERT (token_right. find ( " " ) == std::string::npos );
1209+ GGML_ASSERT (token_right. find ( " \n " ) == std::string::npos );
12091210
12101211 auto it = bpe_ranks.find (std::make_pair (token_left, token_right));
12111212 if (it == bpe_ranks.end ()) {
@@ -7499,7 +7500,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
74997500
75007501 for (size_t i = 0 ; i < candidates->size ; ++i) {
75017502 const llama_token id = candidates->data [i].id ;
7502- const std::string piece = llama_token_to_str (ctx, id);
7503+ const std::string piece = llama_token_to_piece (ctx, id);
75037504 if (id == eos) {
75047505 if (!allow_eos) {
75057506 candidates->data [i].logit = -INFINITY;
@@ -7711,7 +7712,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
77117712 GGML_ASSERT (false );
77127713 }
77137714
7714- const std::string piece = llama_token_to_str (ctx, token);
7715+ const std::string piece = llama_token_to_piece (ctx, token);
77157716
77167717 // Note terminating 0 in decoded string
77177718 const auto decoded = decode_utf8 (piece.c_str (), grammar->partial_utf8 );
0 commit comments