@@ -1329,30 +1329,74 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
13291329 GGML_CANN_CALL_ACLNN_OP (ctx, InplacePowTensorTensor, acl_dst, acl_exp);
13301330}
13311331
1332-
1332+ /* *
1333+ * @brief Generate a range of values and apply a scalar base exponentiation.
1334+ *
1335+ * This function creates an evenly spaced sequence from `start` to `stop` (exclusive),
1336+ * with step size `step`, stores it in a temporary buffer, and then computes:
1337+ *
1338+ * @f[
1339+ * slope[i] = m^{\left( start + i \cdot step \right)}, \quad 0 \le i < size
1340+ * @f]
1341+ *
1342+ * The results are written to the provided @p slope_buffer.
1343+ *
1344+ * @param ctx CANN backend context for memory allocation and operator execution.
1345+ * @param slope_buffer Pointer to the output buffer (float array) for the computed slope values.
1346+ * @param m Scalar base for the exponentiation.
1347+ * @param size Number of elements in the generated sequence.
1348+ * @param start Starting exponent offset.
1349+ * @param stop Stopping exponent offset (exclusive).
1350+ * @param step Step size for the exponent increment.
1351+ */
13331352static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
13341353 float m, int64_t size, float start, float stop, float step){
13351354 int64_t ne[] = {size};
13361355 size_t nb[] = {sizeof (float )};
13371356
13381357 ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof (float ));
1339- void * arange_buffer = arange_allocator.get ();
1358+ void * arange_buffer = arange_allocator.get ();
13401359
1341- aclTensor * arange_tensor = ggml_cann_create_tensor (
1360+ aclTensor* arange_tensor = ggml_cann_create_tensor (
13421361 arange_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
13431362 aclnn_arange (ctx, arange_tensor, start, stop, step, size);
13441363
1345- aclTensor * slope_tensor = ggml_cann_create_tensor (
1364+ aclTensor* slope_tensor = ggml_cann_create_tensor (
13461365 slope_buffer, ACL_FLOAT, sizeof (float ), ne, nb, 1 );
13471366
1348- aclScalar * sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
1367+ aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
13491368
13501369 GGML_CANN_CALL_ACLNN_OP (ctx, PowScalarTensor, sc, arange_tensor, slope_tensor);
13511370 ggml_cann_release_resources (ctx, sc, arange_tensor, slope_tensor);
13521371}
13531372
1373+ /* *
1374+ * @brief Compute slope values for multiple attention heads based on ALiBi bias parameters.
1375+ *
1376+ * This function generates slope values for each attention head according to the ALiBi
1377+ * (Attention with Linear Biases) method. It splits the computation into two ranges depending
1378+ * on whether the head index is less than @p n_head_log2 or not, and uses different base values
1379+ * (`m0` and `m1`) for the exponentiation.
1380+ *
1381+ * @f[
1382+ * slope[h] =
1383+ * \begin{cases}
1384+ * m_0^{(h + 1)}, & h < n\_head\_log2 \\
1385+ * m_1^{\left( 2 \cdot (h - n\_head\_log2) + 1 \right)}, & h \geq n\_head\_log2
1386+ * \end{cases}
1387+ * \quad , \quad \text{if } max\_bias > 0
1388+ * @f]
1389+ *
1390+ * If @p max_bias <= 0, all slope values are set to 1.0.
1391+ *
1392+ * @param ctx CANN backend context for memory allocation and operator execution.
1393+ * @param n_head Total number of attention heads.
1394+ * @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
1395+ * @param max_bias Maximum bias value for slope computation.
1396+ *
1397+ */
13541398static void aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t n_head,
1355- void * slope_buffer, float max_bias) {
1399+ void * slope_buffer, float max_bias) {
13561400 const int n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
13571401
13581402 float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
@@ -1382,6 +1426,27 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
13821426 }
13831427}
13841428
1429+ /* *
1430+ * @brief Add ALiBi (Attention with Linear Biases) positional biases to the attention mask.
1431+ *
1432+ * This function computes the ALiBi slopes for each attention head (if max_bias > 0),
1433+ * multiplies them with the attention mask to produce bias tensors, and adds these biases
1434+ * to the destination tensor (@p dst).
1435+ *
1436+ * The function performs necessary broadcasting of the mask and slope tensors to match
1437+ * the shape of the destination tensor, then applies element-wise multiplication and addition
1438+ * using CANN operators.
1439+ *
1440+ * @param ctx CANN backend context for memory management and operator execution.
1441+ * @param mask Input attention mask tensor, assumed to be contiguous.
1442+ * @param dst Destination tensor to which ALiBi biases will be added.
1443+ * @param dst_ptr Pointer to the memory of the destination tensor.
1444+ * @param max_bias Maximum bias value controlling the slope scaling.
1445+ *
1446+ * @note
1447+ * - Write data into dst_ptr using only the shape information of the dst tensor.
1448+ * - `GGML_MAX_DIMS + 2` is used to extend tensor dimensions for broadcasting.
1449+ */
13851450static void aclnn_add_alibi (ggml_backend_cann_context& ctx, ggml_tensor* mask,
13861451 ggml_tensor* dst, void * dst_ptr, float max_bias) {
13871452 void * slope_buffer = nullptr ;
@@ -1399,7 +1464,6 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
13991464 }
14001465
14011466 // broadcast for mask, slop and dst;
1402- GGML_ASSERT (ggml_is_contiguous (mask));
14031467 int64_t nr2 = dst->ne [2 ] / mask->ne [2 ];
14041468 int64_t nr3 = dst->ne [3 ] / mask->ne [3 ];
14051469
@@ -1424,12 +1488,14 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
14241488 slope_nb[i] = slope_nb[i - 1 ] * slope_ne[i - 1 ];
14251489 }
14261490
1427- aclTensor * acl_slope = ggml_cann_create_tensor (
1491+ aclTensor* acl_slope = ggml_cann_create_tensor (
14281492 slope_buffer, ACL_FLOAT, sizeof (float ),
14291493 slope_ne, slope_nb, GGML_MAX_DIMS + 2 );
1430- aclTensor * acl_mask = ggml_cann_create_tensor (
1494+ aclTensor* acl_mask = ggml_cann_create_tensor (
14311495 mask, mask_ne, mask_nb, GGML_MAX_DIMS + 2 );
1432- aclTensor * acl_dst = ggml_cann_create_tensor (
1496+
1497+ // write data into dst_ptr using only the shape information of the dst tensor.
1498+ aclTensor* acl_dst = ggml_cann_create_tensor (
14331499 dst_ptr, ggml_cann_type_mapping (dst->type ),
14341500 ggml_type_size (dst->type ), dst_ne, dst_nb,
14351501 GGML_MAX_DIMS + 2 );
@@ -1441,7 +1507,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
14411507 for (int i = 1 ; i < GGML_MAX_DIMS + 2 ; i++) {
14421508 bias_nb[i] = bias_nb[i - 1 ] * bias_ne[i - 1 ];
14431509 }
1444- aclTensor * bias_tensor = ggml_cann_create_tensor (
1510+ aclTensor* bias_tensor = ggml_cann_create_tensor (
14451511 bias_buffer, ACL_FLOAT, sizeof (float ),
14461512 bias_ne, bias_nb, GGML_MAX_DIMS + 2 );
14471513
@@ -1473,16 +1539,16 @@ void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
14731539 * stored.
14741540 */
14751541static void aclnn_softmax (ggml_backend_cann_context & ctx,
1476- aclTensor * acl_src, int64_t dim, aclTensor * acl_dst) {
1542+ aclTensor* acl_src, int64_t dim, aclTensor * acl_dst) {
14771543 GGML_CANN_CALL_ACLNN_OP (ctx, Softmax, acl_src, dim, acl_dst);
14781544}
14791545
14801546void ggml_cann_softmax (ggml_backend_cann_context & ctx, ggml_tensor * dst) {
1481- ggml_tensor * src0 = dst->src [0 ];
1482- ggml_tensor * src1 = dst->src [1 ]; // mask
1547+ ggml_tensor* src0 = dst->src [0 ];
1548+ ggml_tensor* src1 = dst->src [1 ]; // mask
14831549
1484- aclTensor * acl_src0 = ggml_cann_create_tensor (src0);
1485- aclTensor * acl_dst = ggml_cann_create_tensor (dst);
1550+ aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
1551+ aclTensor* acl_dst = ggml_cann_create_tensor (dst);
14861552
14871553 float scale = 1 .0f ;
14881554 float max_bias = 0 .0f ;
@@ -1491,7 +1557,7 @@ void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
14911557 memcpy (&max_bias, (float *) dst->op_params + 1 , sizeof (float ));
14921558
14931559 // input mul scale
1494- aclScalar * acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
1560+ aclScalar* acl_scale = aclCreateScalar (&scale, aclDataType::ACL_FLOAT);
14951561 ggml_cann_pool_alloc src_tensor_allocator (ctx.pool (), ggml_nbytes (src0));
14961562 void * src_tensor_buffer = src_tensor_allocator.get ();
14971563 aclTensor* softmax_tensor = ggml_cann_create_tensor (
0 commit comments