@@ -249,6 +249,7 @@ struct cmd_params {
249249 ggml_sched_priority prio;
250250 int delay;
251251 bool verbose;
252+ bool progress;
252253 output_formats output_format;
253254 output_formats output_format_stderr;
254255};
@@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = {
280281 /* prio */ GGML_SCHED_PRIO_NORMAL,
281282 /* delay */ 0 ,
282283 /* verbose */ false ,
284+ /* progress */ false ,
283285 /* output_format */ MARKDOWN,
284286 /* output_format_stderr */ NONE,
285287};
@@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) {
319321 printf (" -o, --output <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format ));
320322 printf (" -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n " , output_format_str (cmd_params_defaults.output_format_stderr ));
321323 printf (" -v, --verbose (default: %s)\n " , cmd_params_defaults.verbose ? " 1" : " 0" );
324+ printf (" --progress (default: %s)\n " , cmd_params_defaults.progress ? " 1" : " 0" );
322325 printf (" \n " );
323326 printf (" Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n " );
324327}
@@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
364367 params.numa = cmd_params_defaults.numa ;
365368 params.prio = cmd_params_defaults.prio ;
366369 params.delay = cmd_params_defaults.delay ;
370+ params.progress = cmd_params_defaults.progress ;
367371
368372 for (int i = 1 ; i < argc; i++) {
369373 arg = argv[i];
@@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
616620 invalid_param = !output_format_from_str (argv[i], params.output_format_stderr );
617621 } else if (arg == " -v" || arg == " --verbose" ) {
618622 params.verbose = true ;
623+ } else if (arg == " --progress" ) {
624+ params.progress = true ;
619625 } else {
620626 invalid_param = true ;
621627 break ;
@@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) {
15231529 llama_model * lmodel = nullptr ;
15241530 const cmd_params_instance * prev_inst = nullptr ;
15251531
1532+ int params_idx = 0 ;
1533+ auto params_count = params_instances.size ();
15261534 for (const auto & inst : params_instances) {
1535+ params_idx ++;
1536+ if (params.progress ) {
1537+ fprintf (stderr, " llama-bench: benchmark %d/%ld: starting\n " , params_idx, params_count);
1538+ }
15271539 // keep the same model between tests when possible
15281540 if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
15291541 if (lmodel) {
@@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) {
15561568
15571569 struct ggml_threadpool_params tpp = ggml_threadpool_params_default (t.n_threads );
15581570 if (!parse_cpu_mask (t.cpu_mask , tpp.cpumask )) {
1559- LOG_TEE ( " %s: failed to parse cpu-mask: %s\n " , __func__, t.cpu_mask .c_str ());
1571+ fprintf (stderr, " %s: failed to parse cpu-mask: %s\n " , __func__, t.cpu_mask .c_str ());
15601572 exit (1 );
15611573 }
15621574 tpp.strict_cpu = t.cpu_strict ;
@@ -1565,18 +1577,24 @@ int main(int argc, char ** argv) {
15651577
15661578 struct ggml_threadpool * threadpool = ggml_threadpool_new (&tpp);
15671579 if (!threadpool) {
1568- LOG_TEE ( " %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1580+ fprintf (stderr, " %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
15691581 exit (1 );
15701582 }
15711583
15721584 llama_attach_threadpool (ctx, threadpool, NULL );
15731585
15741586 // warmup run
15751587 if (t.n_prompt > 0 ) {
1588+ if (params.progress ) {
1589+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup prompt run\n " , params_idx, params_count);
1590+ }
15761591 // test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
15771592 test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
15781593 }
15791594 if (t.n_gen > 0 ) {
1595+ if (params.progress ) {
1596+ fprintf (stderr, " llama-bench: benchmark %d/%ld: warmup generation run\n " , params_idx, params_count);
1597+ }
15801598 test_gen (ctx, 1 , 0 , t.n_threads );
15811599 }
15821600
@@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) {
15861604 uint64_t t_start = get_time_ns ();
15871605
15881606 if (t.n_prompt > 0 ) {
1607+ if (params.progress ) {
1608+ fprintf (stderr, " llama-bench: benchmark %d/%ld: prompt run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1609+ }
15891610 test_prompt (ctx, t.n_prompt , 0 , t.n_batch , t.n_threads );
15901611 }
15911612 if (t.n_gen > 0 ) {
1613+ if (params.progress ) {
1614+ fprintf (stderr, " llama-bench: benchmark %d/%ld: generation run %d/%d\n " , params_idx, params_count, i + 1 , params.reps );
1615+ }
15921616 test_gen (ctx, t.n_gen , t.n_prompt , t.n_threads );
15931617 }
15941618
0 commit comments