File tree Expand file tree Collapse file tree 1 file changed +21
-1
lines changed Expand file tree Collapse file tree 1 file changed +21
-1
lines changed Original file line number Diff line number Diff line change @@ -477,7 +477,7 @@ typedef struct {
477477} block_q6_K;
478478static_assert (sizeof (block_q6_K) == sizeof (ggml_fp16_t ) + 13 *QK_K/16 , " wrong q6_K block size/padding" );
479479
480- #define QR2_XXS 4
480+ #define QR2_XXS 8
481481#define QI2_XXS (QK_K / (4 *QR2_XXS))
482482typedef struct {
483483 half d;
@@ -3955,6 +3955,25 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
39553955#if QK_K == 256
39563956 const block_iq2_xxs * bq2 = (const block_iq2_xxs *) vbq;
39573957
3958+ #if QR2_XXS == 8
3959+ const int ib32 = iqs;
3960+ const uint16_t * q2 = bq2->qs + 4 *ib32;
3961+ const uint8_t * aux8 = (const uint8_t *)q2;
3962+ const int8_t * q8 = bq8_1[ib32].qs ;
3963+ uint32_t aux32 = q2[2 ] | (q2[3 ] << 16 );
3964+ int sumi = 0 ;
3965+ for (int l = 0 ; l < 4 ; ++l) {
3966+ const uint8_t * grid = (const uint8_t *)(kgrid_iq2xxs + aux8[l]);
3967+ const uint8_t signs = ksigns_iq2xs[aux32 & 127 ];
3968+ for (int j = 0 ; j < 8 ; ++j) {
3969+ sumi += q8[j] * grid[j] * (signs & kmask_iq2xs[j] ? -1 : 1 );
3970+ }
3971+ q8 += 8 ;
3972+ aux32 >>= 7 ;
3973+ }
3974+ const float d = (float )bq2->d * (0 .5f + aux32) * (float )bq8_1[ib32].ds .x * 0 .25f ;
3975+ return d * sumi;
3976+ #else
39583977 // iqs is 0...15
39593978 const int ib32 = iqs/2 ;
39603979 const int il = iqs%2 ;
@@ -3973,6 +3992,7 @@ static __device__ __forceinline__ float vec_dot_iq2_xxs_q8_1(
39733992 sumi2 += q8[j+8 ] * grid2[j] * (signs2 & kmask_iq2xs[j] ? -1 : 1 );
39743993 }
39753994 return d * (sumi1 + sumi2);
3995+ #endif
39763996#else
39773997 assert (false );
39783998 return 0 .f ;
You can’t perform that action at this time.
0 commit comments