@@ -291,7 +291,6 @@ static void print_usage(int /* argc */, char ** argv) {
291291 printf (" -fa, --flash-attn <0|1> (default: %s)\n " , join (cmd_params_defaults.flash_attn , " ," ).c_str ());
292292 printf (" -mmp, --mmap <0|1> (default: %s)\n " , join (cmd_params_defaults.use_mmap , " ," ).c_str ());
293293 printf (" --numa <distribute|isolate|numactl> (default: disabled)\n " );
294- printf (" -mt, --max-threads <n> (default: %d)\n " , cmd_params_defaults.cpuparams .n_threads );
295294 printf (" -C, --cpu-mask <hex> (default: 0x0)\n " );
296295 printf (" --cpu-strict <0|1> (default: %d)\n " , cmd_params_defaults.cpuparams .strict_cpu );
297296 printf (" --priority <0|1|2|3> (default: %d)\n " , cmd_params_defaults.cpuparams .priority );
@@ -499,12 +498,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
499498 else if (value == " numactl" ) { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
500499 else { invalid_param = true ; break ; }
501500 }
502- } else if (arg == " -mt" || arg == " --max-threads" ) {
503- if (++i >= argc) {
504- invalid_param = true ;
505- break ;
506- }
507- params.cpuparams .n_threads = std::stoi (argv[i]);
508501 } else if (arg == " -C" || arg == " --cpu-mask" ) {
509502 if (++i >= argc) {
510503 invalid_param = true ;
@@ -1435,21 +1428,6 @@ int main(int argc, char ** argv) {
14351428
14361429 postprocess_cpu_params (params.cpuparams );
14371430
1438- struct ggml_threadpool_params tpp;
1439- tpp.n_threads = params.cpuparams .n_threads ;
1440- tpp.mask_specified = params.cpuparams .mask_valid ;
1441- tpp.strict_cpu = params.cpuparams .strict_cpu ;
1442- tpp.prio = params.cpuparams .priority ;
1443- tpp.poll = params.cpuparams .poll ;
1444-
1445- std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1446-
1447- struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1448- if (!threadpool) {
1449- LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1450- exit (1 );
1451- }
1452-
14531431 for (const auto & inst : params_instances) {
14541432 // keep the same model between tests when possible
14551433 if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
@@ -1475,6 +1453,22 @@ int main(int argc, char ** argv) {
14751453 test t (inst, lmodel, ctx);
14761454
14771455 llama_kv_cache_clear (ctx);
1456+
1457+ struct ggml_threadpool_params tpp;
1458+ tpp.n_threads = t.n_threads ;
1459+ tpp.mask_specified = params.cpuparams .mask_valid ;
1460+ tpp.strict_cpu = params.cpuparams .strict_cpu ;
1461+ tpp.prio = params.cpuparams .priority ;
1462+ tpp.poll = params.cpuparams .poll ;
1463+
1464+ std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1465+
1466+ struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1467+ if (!threadpool) {
1468+ LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1469+ exit (1 );
1470+ }
1471+
14781472 llama_attach_threadpool (ctx, threadpool);
14791473
14801474 // warmup run
@@ -1515,9 +1509,9 @@ int main(int argc, char ** argv) {
15151509 llama_print_timings (ctx);
15161510
15171511 llama_free (ctx);
1518- }
15191512
1520- ggml_release_threadpool (threadpool);
1513+ ggml_release_threadpool (threadpool);
1514+ }
15211515
15221516 llama_free_model (lmodel);
15231517
0 commit comments