@@ -1330,60 +1330,68 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
13301330}
13311331
13321332
1333- static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer, float m, int64_t size, float start, float stop, float step){
1333+ static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
1334+ float m, int64_t size, float start, float stop, float step){
13341335 int64_t ne[] = {size};
13351336 size_t nb[] = {sizeof (float )};
13361337
1337- ggml_cann_pool_alloc arange_allocator (ctx.pool (),size * sizeof (float ));
1338- void * arange_buffer = arange_allocator.get ();
1339-
1340- aclTensor* arange_tensor = ggml_cann_create_tensor (
1341- arange_buffer, ACL_FLOAT,
1342- sizeof (float ), ne, nb, 1 );
1338+ ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof (float ));
1339+ void * arange_buffer = arange_allocator.get ();
1340+
1341+ aclTensor * arange_tensor = ggml_cann_create_tensor (
1342+ arange_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
13431343 aclnn_arange (ctx, arange_tensor, start, stop, step, size);
13441344
1345- aclTensor* slope_tensor = ggml_cann_create_tensor (
1346- slope_buffer, ACL_FLOAT,
1347- sizeof (float ), ne, nb, 1 );
1345+ aclTensor * slope_tensor = ggml_cann_create_tensor (
1346+ slope_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
13481347
1349- aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
1348+ aclScalar * sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
13501349
13511350 GGML_CANN_CALL_ACLNN_OP (ctx, PowScalarTensor, sc, arange_tensor, slope_tensor);
13521351 ggml_cann_release_resources (ctx, sc, arange_tensor, slope_tensor);
13531352}
13541353
1355- static void aclnn_get_slope (ggml_backend_cann_context& ctx, int64_t n_head, void * slope_buffer, float max_bias) {
1356- const int n_head_log2 = 1u << (uint32_t )floor (log2 (n_head));
1354+ static void aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t n_head,
1355+ void * slope_buffer, float max_bias) {
1356+ const int n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
13571357
13581358 float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
13591359 float m1 = powf (2 .0f , -(max_bias / 2 .0f ) / n_head_log2);
13601360
1361- // const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f;
1361+ // const float slope = (max_bias > 0.0f) ?
1362+ // h < n_head_log2 ?
1363+ // powf(m0, h + 1) :
1364+ // powf(m1, 2*(h - n_head_log2) + 1) :
1365+ // 1.0f;
13621366 // arange1
13631367 float start = 0 + 1 ;
1364- float end = (n_head_log2 - 1 ) + 1 ;
1365- float step = 1 ;
1368+ float end = (n_head_log2 - 1 ) + 1 ;
1369+ float step = 1 ;
13661370 float count = n_head_log2;
13671371 // end needs to be +1 because aclnn uses a left-closed, right-open interval.
13681372 aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step);
13691373 if (n_head_log2 < n_head) {
13701374 // arange2
1371- start = 2 * (n_head_log2 - n_head_log2) + 1 ;
1372- end = 2 * ((n_head - 1 ) - n_head_log2) + 1 ;
1373- step = 2 ;
1375+ start = 2 * (n_head_log2 - n_head_log2) + 1 ;
1376+ end = 2 * ((n_head - 1 ) - n_head_log2) + 1 ;
1377+ step = 2 ;
13741378 count = n_head - n_head_log2;
1375- aclnn_get_slope_inner (ctx, (char *)slope_buffer + n_head_log2* sizeof (float ), m1, count, start, end + 1 , step);
1379+ aclnn_get_slope_inner (
1380+ ctx, (char *) slope_buffer + n_head_log2 * sizeof (float ),
1381+ m1, count, start, end + 1 , step);
13761382 }
13771383}
13781384
1379- static void aclnn_add_alibi (ggml_backend_cann_context& ctx, ggml_tensor* mask, ggml_tensor* dst, void * dst_ptr, float max_bias) {
1385+ static void aclnn_add_alibi (ggml_backend_cann_context& ctx, ggml_tensor* mask,
1386+ ggml_tensor* dst, void * dst_ptr, float max_bias) {
13801387 void * slope_buffer = nullptr ;
13811388 void * bias_buffer = nullptr ;
13821389
13831390 int64_t n_heads = dst->ne [2 ];
13841391 ggml_cann_pool_alloc slope_allocator (ctx.pool (), n_heads * sizeof (float ));
13851392 slope_buffer = slope_allocator.get ();
1386- ggml_cann_pool_alloc bias_allocator (ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
1393+ ggml_cann_pool_alloc bias_allocator (
1394+ ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
13871395 bias_buffer = bias_allocator.get ();
13881396
13891397 if (max_bias > 0 .0f ) {
@@ -1396,44 +1404,46 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask, g
13961404 int64_t nr3 = dst->ne [3 ] / mask->ne [3 ];
13971405
13981406 // broadcast the mask across rows
1399- int64_t mask_ne[] = {mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], 1 , mask->ne [3 ], 1 };
1400- size_t mask_nb[GGML_MAX_DIMS + 2 ];
1401- mask_nb[0 ] = mask->nb [0 ];
1402- mask_nb[1 ] = mask->nb [1 ];
1403- mask_nb[2 ] = mask->nb [2 ];
1404- mask_nb[3 ] = mask->nb [2 ];
1405- mask_nb[4 ] = mask->nb [3 ];
1406- mask_nb[5 ] = mask->nb [3 ];
1407-
1408- // ne2 and ne3 may be integer multiples of the mask.
1409- int64_t dst_ne[] = {dst->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], nr3};
1410- size_t dst_nb[GGML_MAX_DIMS + 2 ];
1411- dst_nb[0 ] = ggml_element_size (dst);
1412- for (int i = 1 ;i<GGML_MAX_DIMS + 2 ;i++) {
1413- dst_nb[i] = dst_nb[i-1 ]* dst_ne[i-1 ];
1414- }
1407+ int64_t mask_ne[] = { mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], 1 , mask->ne [3 ], 1 };
1408+ size_t mask_nb[] = {
1409+ mask_nb[0 ] = mask->nb [0 ], mask_nb[1 ] = mask->nb [1 ], mask_nb[2 ] = mask->nb [2 ],
1410+ mask_nb[3 ] = mask->nb [2 ], mask_nb[4 ] = mask->nb [3 ], mask_nb[5 ] = mask->nb [3 ]
1411+ };
1412+
1413+ int64_t dst_ne[] = { dst->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], nr3 };
1414+ size_t dst_nb[] = {
1415+ dst_nb[0 ] = dst->nb [0 ], dst_nb[1 ] = dst->nb [1 ], dst_nb[2 ] = dst->nb [2 ],
1416+ dst_nb[3 ] = dst->nb [2 ], dst_nb[4 ] = dst->nb [3 ], dst_nb[5 ] = dst->nb [3 ]
1417+ };
14151418
14161419 // slope is a 1 dim tensor, slope.ne2 == dst.ne2
1417- int64_t slope_ne[] = {1 , 1 , mask->ne [2 ], nr2, 1 , 1 };
1418- size_t slope_nb[GGML_MAX_DIMS + 2 ];
1420+ int64_t slope_ne[] = { 1 , 1 , mask->ne [2 ], nr2, 1 , 1 };
1421+ size_t slope_nb[GGML_MAX_DIMS + 2 ];
14191422 slope_nb[0 ] = sizeof (float );
1420- for (int i = 1 ;i< GGML_MAX_DIMS + 2 ;i++) {
1421- slope_nb[i] = slope_nb[i- 1 ] * slope_ne[i- 1 ];
1423+ for (int i = 1 ; i < GGML_MAX_DIMS + 2 ; i++) {
1424+ slope_nb[i] = slope_nb[i - 1 ] * slope_ne[i - 1 ];
14221425 }
14231426
1424- aclTensor* acl_slope = ggml_cann_create_tensor (slope_buffer, ACL_FLOAT, sizeof (float ), slope_ne, slope_nb, GGML_MAX_DIMS + 2 );
1425- aclTensor* acl_mask = ggml_cann_create_tensor (mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2 );
1426- aclTensor* acl_dst = ggml_cann_create_tensor (dst_ptr, ggml_cann_type_mapping (dst->type ),
1427- ggml_type_size (dst->type ), dst_ne, dst_nb, GGML_MAX_DIMS + 2 );
1428-
1427+ aclTensor * acl_slope = ggml_cann_create_tensor (
1428+ slope_buffer, ACL_FLOAT, sizeof (float ),
1429+ slope_ne, slope_nb, GGML_MAX_DIMS + 2 );
1430+ aclTensor * acl_mask = ggml_cann_create_tensor (
1431+ mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2 );
1432+ aclTensor * acl_dst = ggml_cann_create_tensor (
1433+ dst_ptr, ggml_cann_type_mapping (dst->type ),
1434+ ggml_type_size (dst->type ), dst_ne, dst_nb,
1435+ GGML_MAX_DIMS + 2 );
1436+
14291437 if (max_bias > 0 .0f ) {
1430- int64_t bias_ne[] = {mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], 1 };
1431- size_t bias_nb[GGML_MAX_DIMS + 2 ];
1438+ int64_t bias_ne[] = { mask->ne [0 ], dst->ne [1 ], mask->ne [2 ], nr2, mask->ne [3 ], 1 };
1439+ size_t bias_nb[GGML_MAX_DIMS + 2 ];
14321440 bias_nb[0 ] = sizeof (float );
1433- for (int i = 1 ;i< GGML_MAX_DIMS + 2 ;i++) {
1434- bias_nb[i] = bias_nb[i- 1 ] * bias_ne[i- 1 ];
1441+ for (int i = 1 ; i < GGML_MAX_DIMS + 2 ; i++) {
1442+ bias_nb[i] = bias_nb[i - 1 ] * bias_ne[i - 1 ];
14351443 }
1436- aclTensor* bias_tensor = ggml_cann_create_tensor (bias_buffer, ACL_FLOAT, sizeof (float ), bias_ne, bias_nb, GGML_MAX_DIMS + 2 );
1444+ aclTensor * bias_tensor = ggml_cann_create_tensor (
1445+ bias_buffer, ACL_FLOAT, sizeof (float ),
1446+ bias_ne, bias_nb, GGML_MAX_DIMS + 2 );
14371447
14381448 aclnn_mul (ctx, acl_slope, acl_mask, bias_tensor);
14391449 aclnn_add (ctx, acl_dst, bias_tensor);
@@ -1444,7 +1454,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask, g
14441454 ggml_cann_release_resources (ctx, acl_slope, acl_mask, acl_dst);
14451455}
14461456
1447- void ggml_cann_cpy (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1457+ void ggml_cann_cpy (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
14481458 ggml_cann_dup (ctx, dst);
14491459}
14501460
@@ -1462,31 +1472,31 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
14621472 * @param acl_dst The destination tensor where the softmax results will be
14631473 * stored.
14641474 */
1465- static void aclnn_softmax (ggml_backend_cann_context& ctx, aclTensor* acl_src,
1466- int64_t dim, aclTensor* acl_dst) {
1475+ static void aclnn_softmax (ggml_backend_cann_context & ctx,
1476+ aclTensor * acl_src, int64_t dim, aclTensor * acl_dst) {
14671477 GGML_CANN_CALL_ACLNN_OP (ctx, Softmax, acl_src, dim, acl_dst);
14681478}
14691479
1470- void ggml_cann_softmax (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1471- ggml_tensor* src0 = dst->src [0 ];
1472- ggml_tensor* src1 = dst->src [1 ]; // mask
1480+ void ggml_cann_softmax (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
1481+ ggml_tensor * src0 = dst->src [0 ];
1482+ ggml_tensor * src1 = dst->src [1 ]; // mask
14731483
1474- aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
1475- aclTensor* acl_dst = ggml_cann_create_tensor (dst);
1484+ aclTensor * acl_src0 = ggml_cann_create_tensor (src0);
1485+ aclTensor * acl_dst = ggml_cann_create_tensor (dst);
14761486
1477- float scale = 1 .0f ;
1487+ float scale = 1 .0f ;
14781488 float max_bias = 0 .0f ;
14791489
1480- memcpy (&scale, (float *) dst->op_params + 0 , sizeof (float ));
1481- memcpy (&max_bias, (float *) dst->op_params + 1 , sizeof (float ));
1490+ memcpy (&scale, (float *) dst->op_params + 0 , sizeof (float ));
1491+ memcpy (&max_bias, (float *) dst->op_params + 1 , sizeof (float ));
14821492
14831493 // input mul scale
1484- aclScalar* acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
1494+ aclScalar * acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
14851495 ggml_cann_pool_alloc src_tensor_allocator (ctx.pool (), ggml_nbytes (src0));
14861496 void * src_tensor_buffer = src_tensor_allocator.get ();
14871497 aclTensor* softmax_tensor = ggml_cann_create_tensor (
1488- src_tensor_buffer, ggml_cann_type_mapping (src0->type ), ggml_element_size (src0), src0-> ne ,
1489- src0-> nb , GGML_MAX_DIMS);
1498+ src_tensor_buffer, ggml_cann_type_mapping (src0->type ),
1499+ ggml_element_size ( src0), src0-> ne , src0-> nb , GGML_MAX_DIMS);
14901500
14911501 aclnn_muls (ctx, acl_src0, scale, softmax_tensor, false );
14921502
@@ -1496,8 +1506,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
14961506 }
14971507 // softmax
14981508 aclnn_softmax (ctx, softmax_tensor, 3 , acl_dst);
1499- ggml_cann_release_resources (ctx, acl_src0, acl_dst,
1500- acl_scale, softmax_tensor);
1509+ ggml_cann_release_resources (ctx, acl_src0, acl_dst, acl_scale, softmax_tensor);
15011510}
15021511
15031512/* *
0 commit comments