@@ -106,23 +106,39 @@ class chat_formatter {
106106
107107 std::string operator () (const std::string & role, const std::string & content, [[maybe_unused]] bool use_toolcalls = false ) {
108108
109- common_chat_msg new_msg;
110- new_msg.role = role;
111- new_msg.content = content;
112-
113- common_chat_params cparams;
114109 common_chat_templates_inputs cinputs;
110+ cinputs.use_jinja = params_.use_jinja ;
111+ cinputs.add_generation_prompt = (role == " user" );
115112#ifdef LLAMA_USE_TOOLCALL
116113 if (tc_client_ != nullptr && use_toolcalls) {
117114 cinputs.tool_choice = common_chat_tool_choice_parse_oaicompat (tc_client_->tool_choice ());
118115 cinputs.tools = common_chat_tools_parse_oaicompat (tc_client_->tool_list ());
119116 }
120117#endif
121- bool add_ass = role == " user" ;
122- auto formatted =
123- common_chat_format_single (chat_templates_, chat_msgs_, new_msg, add_ass, params_.use_jinja ,
124- &cinputs, &cparams);
118+ for (const auto & msg : chat_msgs_) {
119+ cinputs.messages .push_back (common_chat_msg (msg));
120+ }
121+
122+ common_chat_msg new_msg = common_chat_parse (content, *chat_format_);
123+ new_msg.role = role;
124+
125+ if (! new_msg.tool_calls .empty ()) {
126+ nlohmann::json result_array = nlohmann::json::array ();
127+ for (const auto & tc : new_msg.tool_calls ) {
128+ toolcall::result_set res = tc_client_->call (tc.name , tc.arguments , tc.id );
129+ if (! res.empty ()) {
130+ for (const auto & r : res) {
131+ result_array.push_back (r.data );
132+ }
133+ }
134+ }
135+ new_msg.content += result_array.dump (-1 );
136+ }
137+
138+ cinputs.messages .push_back (new_msg);
139+ common_chat_params cparams = common_chat_templates_apply (chat_templates_, cinputs);
125140
141+ auto formatted = cparams.prompt ;
126142 chat_msgs_.push_back (new_msg);
127143 LOG_DBG (" formatted: '%s'\n " , formatted.c_str ());
128144
@@ -145,42 +161,6 @@ class chat_formatter {
145161#endif
146162};
147163
148- #ifdef LLAMA_USE_TOOLCALL
149- static bool call_tool (common_chat_format chat_format, const std::string & assistant_msg, llama_context * ctx,
150- toolcall::client::ptr tc_client, std::vector<llama_token> & embd_inp)
151- {
152- bool tool_was_called = false ;
153- common_chat_msg msg = common_chat_parse (assistant_msg, chat_format);
154- if (! msg.tool_calls .empty ()) {
155- for (const auto & tc : msg.tool_calls ) {
156- nlohmann::json tc_oai_json {
157- {" type" , " function" },
158- {" function" , {
159- {" name" , tc.name },
160- {" arguments" , tc.arguments },
161- }},
162- {" id" , tc.id },
163- };
164- toolcall::result_set res = tc_client->call (tc_oai_json);
165- if (! res.empty ()) {
166- std::string toolcall_result_str;
167- for (const auto & r : res) {
168- toolcall_result_str += (" \n " + r.data ); // Although more complex results can be
169- // returned (resources, images, etc.),
170- // for now simply append the data. Later
171- // on support for specific models may
172- // allow for unpacking Base64 data.
173- }
174- auto toolcall_result_tok = common_tokenize (ctx, toolcall_result_str, false , true );
175- embd_inp.insert (embd_inp.end (), toolcall_result_tok.begin (), toolcall_result_tok.end ());
176- }
177- tool_was_called = true ;
178- }
179- }
180- return tool_was_called;
181- }
182- #endif
183-
184164int main (int argc, char ** argv) {
185165 common_params params;
186166 g_params = ¶ms;
@@ -943,16 +923,6 @@ int main(int argc, char ** argv) {
943923 }
944924 }
945925
946- #ifdef LLAMA_USE_TOOLCALL
947- if ((tc_client && n_past > 0 ) && (waiting_for_first_input || is_interacting)) {
948- size_t last_len = embd_inp.size ();
949- bool was_toolcall = call_tool (chat_format, assistant_ss.str (), ctx, tc_client, embd_inp);
950- if (was_toolcall && last_len < embd_inp.size ()) {
951- LOG (" %s" , common_token_to_piece (ctx, embd_inp[last_len]).c_str ());
952- }
953- }
954- #endif
955-
956926 if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
957927 LOG_DBG (" waiting for user input\n " );
958928
0 commit comments