|
1 | | -#include <chrono> |
| 1 | +#include "frontend.h" |
| 2 | +#include "common.h" |
| 3 | +#include "llama.h" |
| 4 | + |
2 | 5 | #include "../server/httplib.h" |
3 | 6 | #include "../server/json.hpp" |
| 7 | + |
4 | 8 | #include <iostream> |
5 | 9 | #include <sstream> |
6 | 10 | #include <thread> |
7 | 11 | #include <vector> |
8 | | -#include "frontend.h" |
9 | | -#include "common.h" |
10 | | -#include "llama.h" |
| 12 | +#include <chrono> |
11 | 13 |
|
12 | 14 | using namespace httplib; |
13 | 15 | using namespace std; |
@@ -241,9 +243,7 @@ struct server_parallel_context { |
241 | 243 | string prompt = data.value("prompt", ""); |
242 | 244 | for (llama_client_slot & slot : slots) |
243 | 245 | { |
244 | | - if ( |
245 | | - slot_id == -1 && slot.available() || |
246 | | - slot.id == slot_id) |
| 246 | + if ((slot_id == -1 && slot.available()) || slot.id == slot_id) |
247 | 247 | { |
248 | 248 | slot.start(prompt, temperature); |
249 | 249 | LOG_TEE("slot %i is processing\n", slot.id); |
@@ -429,8 +429,6 @@ struct server_parallel_context { |
429 | 429 | slot.generated_text += token_str; |
430 | 430 | slot.sampled = id; |
431 | 431 |
|
432 | | - size_t pos = 0; |
433 | | - |
434 | 432 | size_t stop_pos = |
435 | 433 | findStoppingStrings(slot.generated_text, token_str.size(), STOP_FULL); |
436 | 434 |
|
@@ -740,20 +738,34 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, |
740 | 738 | else if (arg == "--numa") |
741 | 739 | { |
742 | 740 | params.numa = true; |
743 | | - } else if (arg == "-cb" || arg == "--cont-batching") { |
| 741 | + } else if (arg == "-cb" || arg == "--cont-batching") |
| 742 | + { |
744 | 743 | params.cont_batching = true; |
745 | | - } else if (arg == "-np" || arg == "--parallel") { |
746 | | - if (++i >= argc) { |
| 744 | + } |
| 745 | + else if (arg == "-np" || arg == "--parallel") |
| 746 | + { |
| 747 | + if (++i >= argc) |
| 748 | + { |
747 | 749 | invalid_param = true; |
748 | 750 | break; |
749 | 751 | } |
750 | 752 | params.n_parallel = std::stoi(argv[i]); |
751 | | - } else if (arg == "-n" || arg == "--n-predict") { |
752 | | - if (++i >= argc) { |
| 753 | + } else if (arg == "-n" || arg == "--n-predict") |
| 754 | + { |
| 755 | + if (++i >= argc) |
| 756 | + { |
753 | 757 | invalid_param = true; |
754 | 758 | break; |
755 | 759 | } |
756 | 760 | params.n_predict = std::stoi(argv[i]); |
| 761 | + } else if (arg == "-r" || arg == "--reverse-prompt") |
| 762 | + { |
| 763 | + if (++i >= argc) |
| 764 | + { |
| 765 | + invalid_param = true; |
| 766 | + break; |
| 767 | + } |
| 768 | + params.antiprompt.push_back(argv[i]); |
757 | 769 | } |
758 | 770 | else |
759 | 771 | { |
|
0 commit comments