@@ -414,8 +414,8 @@ TEST_F(KVCacheManagerTest, BlockManagerTestWindowSizeToShare)
414
414
{
415
415
std::map<SizeType32, std::vector<SizeType32>> windowSizeToLayers{
416
416
{1024 , {1 }}, // contribution = 1024*1 = 1024
417
- {4096 , {0 , 4 , 5 }}, // contribution = 4096*3 = 12288
418
- {8192 , {2 , 3 }}, // contribution = 8192*2 = 16384
417
+ {4096 , {0 , 4 , 5 }}, // contribution = 4096*1 = 4096
418
+ {8192 , {2 , 3 }}, // contribution = 8192*1 = 8192
419
419
};
420
420
// Use identical cache size per token across window sizes for simplicity.
421
421
std::map<SizeType32, SizeType32> cacheSizePerTokenPerWindow{{1024 , 1 }, {4096 , 1 }, {8192 , 1 }};
@@ -431,9 +431,9 @@ TEST_F(KVCacheManagerTest, BlockManagerTestWindowSizeToShare)
431
431
// Calculate expected shares based on contributions.
432
432
std::map<SizeType32, float > expectedShares;
433
433
std::map<SizeType32, SizeType32> contributions;
434
- for (auto const & [windowSize, layers ] : windowSizeToLayers)
434
+ for (auto const & [windowSize, _ ] : windowSizeToLayers)
435
435
{
436
- contributions[windowSize] = windowSize * static_cast <SizeType32>(layers. size ()) ;
436
+ contributions[windowSize] = windowSize * 1 . 0f ;
437
437
}
438
438
auto const totalContribution = std::accumulate (contributions.begin (), contributions.end (), 0 .0f ,
439
439
[](float sum, auto const & kv) { return sum + kv.second ; });
@@ -445,27 +445,28 @@ TEST_F(KVCacheManagerTest, BlockManagerTestWindowSizeToShare)
445
445
}
446
446
447
447
// Verify the exact hard-coded values mentioned in the comment
448
- EXPECT_NEAR (result.at (1024 ), 0 .0345f , 1e-4f );
449
- EXPECT_NEAR (result.at (4096 ), 0 .4138f , 1e-4f );
450
- EXPECT_NEAR (result.at (8192 ), 0 .5517f , 1e-4f );
448
+ EXPECT_NEAR (result.at (1024 ), 0 .0769f , 1e-4f );
449
+ EXPECT_NEAR (result.at (4096 ), 0 .3077f , 1e-4f );
450
+ EXPECT_NEAR (result.at (8192 ), 0 .6154f , 1e-4f );
451
451
452
452
// Verify that when shares are converted to actual block counts, they match expected values.
453
453
auto getRoundedBlocks
454
454
= [&](float share) { return static_cast <SizeType32>(std::round (share * numPrimaryBlocks)); };
455
- EXPECT_EQ (getRoundedBlocks (result.at (1024 )), 565 );
456
- EXPECT_EQ (getRoundedBlocks (result.at (4096 )), 6780 );
457
- EXPECT_EQ (getRoundedBlocks (result.at (8192 )), 9039 );
455
+ EXPECT_EQ (getRoundedBlocks (result.at (1024 )), 1260 );
456
+ EXPECT_EQ (getRoundedBlocks (result.at (4096 )), 5041 );
457
+ EXPECT_EQ (getRoundedBlocks (result.at (8192 )), 10082 );
458
458
}
459
459
460
460
// Variable window size with different cache sizes per token per window
461
461
{
462
462
std::map<SizeType32, std::vector<SizeType32>> windowSizeToLayers{
463
- {1024 , {1 }}, // contribution = 1024*1*2 = 2048 (cache size per token = 2)
464
- {4096 , {0 , 4 , 5 }}, // contribution = 4096*3*4 = 49152 (cache size per token = 4)
465
- {8192 , {2 , 3 }}, // contribution = 8192*2*1 = 16384 (cache size per token = 1)
463
+ {1024 , {1 }}, // contribution = 1024*( 1*2) = 2048 (cache size per token per layer = 2)
464
+ {4096 , {0 , 4 , 5 }}, // contribution = 4096*( 3*4) = 49152 (cache size per token per layer = 4)
465
+ {8192 , {2 , 3 }}, // contribution = 8192*( 2*1) = 16384 (cache size per token per layer = 1)
466
466
};
467
- // Different cache sizes per token per window
468
- std::map<SizeType32, SizeType32> cacheSizePerTokenPerWindow{{1024 , 2 }, {4096 , 4 }, {8192 , 1 }};
467
+ // Different cache sizes per token per window.
468
+ // cacheSizePerTokenPerWindow is accumulated across the layers of given window size.
469
+ std::map<SizeType32, SizeType32> cacheSizePerTokenPerWindow{{1024 , 2 }, {4096 , 12 }, {8192 , 2 }};
469
470
470
471
auto result = BlockManager::calculateWindowSizeToShare (windowSizeToLayers, cacheSizePerTokenPerWindow);
471
472
EXPECT_EQ (result.size (), 3 );
@@ -478,10 +479,10 @@ TEST_F(KVCacheManagerTest, BlockManagerTestWindowSizeToShare)
478
479
// Calculate expected shares based on contributions with different cache sizes per token.
479
480
std::map<SizeType32, float > expectedShares;
480
481
std::map<SizeType32, SizeType32> contributions;
481
- for (auto const & [windowSize, layers ] : windowSizeToLayers)
482
+ for (auto const & [windowSize, _ ] : windowSizeToLayers)
482
483
{
483
484
auto const cacheSizePerToken = cacheSizePerTokenPerWindow.at (windowSize);
484
- contributions[windowSize] = windowSize * static_cast <SizeType32>(layers. size ()) * cacheSizePerToken;
485
+ contributions[windowSize] = windowSize * cacheSizePerToken;
485
486
}
486
487
auto const totalContribution = std::accumulate (contributions.begin (), contributions.end (), 0 .0f ,
487
488
[](float sum, auto const & kv) { return sum + kv.second ; });
0 commit comments