@@ -383,6 +383,12 @@ int main(int argc, char ** argv) {
383383 if (!params.antiprompt .empty ()) {
384384 for (const auto & antiprompt : params.antiprompt ) {
385385 LOG_TEE (" Reverse prompt: '%s'\n " , antiprompt.c_str ());
386+ if (params.verbose_prompt ) {
387+ auto tmp = ::llama_tokenize (ctx, antiprompt, false , true );
388+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
389+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
390+ }
391+ }
386392 }
387393 }
388394
@@ -392,10 +398,22 @@ int main(int argc, char ** argv) {
392398
393399 if (!params.input_prefix .empty ()) {
394400 LOG_TEE (" Input prefix: '%s'\n " , params.input_prefix .c_str ());
401+ if (params.verbose_prompt ) {
402+ auto tmp = ::llama_tokenize (ctx, params.input_prefix , true , true );
403+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
404+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
405+ }
406+ }
395407 }
396408
397409 if (!params.input_suffix .empty ()) {
398410 LOG_TEE (" Input suffix: '%s'\n " , params.input_suffix .c_str ());
411+ if (params.verbose_prompt ) {
412+ auto tmp = ::llama_tokenize (ctx, params.input_suffix , false , true );
413+ for (int i = 0 ; i < (int ) tmp.size (); i++) {
414+ LOG_TEE (" %6d -> '%s'\n " , tmp[i], llama_token_to_piece (ctx, tmp[i]).c_str ());
415+ }
416+ }
399417 }
400418 }
401419 LOG_TEE (" sampling: repeat_last_n = %d, repeat_penalty = %f, presence_penalty = %f, frequency_penalty = %f, top_k = %d, tfs_z = %f, top_p = %f, typical_p = %f, temp = %f, mirostat = %d, mirostat_lr = %f, mirostat_ent = %f\n " ,
@@ -744,8 +762,7 @@ int main(int argc, char ** argv) {
744762 std::string buffer;
745763 if (!params.input_prefix .empty ()) {
746764 LOG (" appending input prefix: '%s'\n " , params.input_prefix .c_str ());
747- buffer += params.input_prefix ;
748- printf (" %s" , buffer.c_str ());
765+ printf (" %s" , params.input_prefix .c_str ());
749766 }
750767
751768 // color user input only
@@ -767,7 +784,6 @@ int main(int argc, char ** argv) {
767784 // append input suffix if any
768785 if (!params.input_suffix .empty ()) {
769786 LOG (" appending input suffix: '%s'\n " , params.input_suffix .c_str ());
770- buffer += params.input_suffix ;
771787 printf (" %s" , params.input_suffix .c_str ());
772788 }
773789
@@ -782,10 +798,14 @@ int main(int argc, char ** argv) {
782798 embd_inp.insert (embd_inp.end (), inp_pfx.begin (), inp_pfx.end ());
783799 }
784800
785- const auto line_inp = ::llama_tokenize (ctx, buffer, false , true );
801+ const auto line_pfx = ::llama_tokenize (ctx, params.input_prefix , false , true );
802+ const auto line_inp = ::llama_tokenize (ctx, buffer, false , false );
803+ const auto line_sfx = ::llama_tokenize (ctx, params.input_suffix , false , true );
786804 LOG (" input tokens: %s\n " , LOG_TOKENS_TOSTR_PRETTY (ctx, line_inp));
787805
806+ embd_inp.insert (embd_inp.end (), line_pfx.begin (), line_pfx.end ());
788807 embd_inp.insert (embd_inp.end (), line_inp.begin (), line_inp.end ());
808+ embd_inp.insert (embd_inp.end (), line_sfx.begin (), line_sfx.end ());
789809
790810 // instruct mode: insert response suffix
791811 if (params.instruct ) {
0 commit comments