@@ -17814,7 +17814,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
1781417814 node->perf_time_us += time_us_cur;
1781517815}
1781617816
17817- static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
17817+ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_threads ) {
1781817818 int n_tasks = 0;
1781917819
1782017820 switch (node->op) {
@@ -17899,7 +17899,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
1789917899 {
1790017900 // FIXME: the cost of launching additional threads decreases performance with GPU offloading
1790117901 //n_tasks = MIN(n_threads, ggml_nelements(node->src[1]));
17902- n_tasks = 1 ;
17902+ n_tasks = MIN(n_cur_threads, ggml_nelements(node->src[1])) ;
1790317903 } break;
1790417904 case GGML_OP_SCALE:
1790517905 case GGML_OP_SET:
@@ -18125,7 +18125,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1812518125 /* FINALIZE */
1812618126 struct ggml_tensor * node = cgraph->nodes[node_n];
1812718127 if (GGML_OP_HAS_FINALIZE[node->op]) {
18128- params.nth = ggml_get_n_tasks(node, n_threads);
18128+ params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads );
1812918129 ggml_compute_forward(¶ms, node);
1813018130 }
1813118131 ggml_graph_compute_perf_stats_node(node, state->shared);
@@ -18135,7 +18135,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1813518135 while (++node_n < cgraph->n_nodes) {
1813618136 GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
1813718137 struct ggml_tensor * node = cgraph->nodes[node_n];
18138- const int n_tasks = ggml_get_n_tasks(node, n_threads);
18138+ const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads );
1813918139
1814018140 state->shared->perf_node_start_cycles = ggml_perf_cycles();
1814118141 state->shared->perf_node_start_time_us = ggml_perf_time_us();
@@ -18183,7 +18183,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1818318183
1818418184 /* INIT & COMPUTE */
1818518185 struct ggml_tensor * node = cgraph->nodes[node_n];
18186- const int n_tasks = ggml_get_n_tasks(node, n_threads);
18186+ const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads );
1818718187
1818818188 struct ggml_compute_params params = {
1818918189 /*.type =*/ GGML_TASK_TYPE_INIT,
@@ -18248,7 +18248,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
1824818248 for (int i = 0; i < cgraph->n_nodes; i++) {
1824918249 struct ggml_tensor * node = cgraph->nodes[i];
1825018250
18251- const int n_tasks = ggml_get_n_tasks(node, n_threads);
18251+ const int n_tasks = ggml_get_n_tasks(node, n_threads, 1 );
1825218252
1825318253 max_tasks = MAX(max_tasks, n_tasks);
1825418254
0 commit comments