Skip to content

Commit 069fc1f

Browse files
authored
Bucketization for IN (#36370)
1 parent d60be54 commit 069fc1f

13 files changed

+464
-148
lines changed

src/EFCore.Relational/Query/SqlNullabilityProcessor.cs

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -131,23 +131,18 @@ protected override Expression VisitExtension(Expression node)
131131
case ParameterTranslationMode.MultipleParameters:
132132
{
133133
var expandedParameters = _collectionParameterExpansionMap.GetOrAddNew(valuesParameter);
134-
for (var i = 0; i < values.Count; i++)
134+
var expandedParametersCounter = 0;
135+
foreach (var value in values)
135136
{
136137
// Create parameter for value if we didn't create it yet,
137138
// otherwise reuse it.
138-
if (expandedParameters.Count <= i)
139-
{
140-
var parameterName = Uniquifier.Uniquify(valuesParameter.Name, queryParameters, int.MaxValue);
141-
queryParameters.Add(parameterName, values[i]);
142-
var parameterExpression = new SqlParameterExpression(parameterName, values[i]?.GetType() ?? typeof(object), elementTypeMapping);
143-
expandedParameters.Add(parameterExpression);
144-
}
139+
ExpandParameterIfNeeded(valuesParameter.Name, expandedParameters, queryParameters, expandedParametersCounter, value, elementTypeMapping);
145140

146141
processedValues.Add(
147142
new RowValueExpression(
148143
ProcessValuesOrderingColumn(
149144
valuesExpression,
150-
[expandedParameters[i]],
145+
[expandedParameters[expandedParametersCounter++]],
151146
intTypeMapping,
152147
ref valuesOrderingCounter)));
153148
}
@@ -814,41 +809,35 @@ InExpression ProcessInExpressionValues(
814809

815810
processedValues = [];
816811

812+
var translationMode = valuesParameter.TranslationMode ?? CollectionParameterTranslationMode;
817813
var expandedParameters = _collectionParameterExpansionMap.GetOrAddNew(valuesParameter);
818814
var expandedParametersCounter = 0;
819-
for (var i = 0; i < values.Count; i++)
815+
foreach (var value in values)
820816
{
821-
if (values[i] is null && removeNulls)
817+
if (value is null && removeNulls)
822818
{
823819
hasNull = true;
824820
continue;
825821
}
826822

827-
switch (valuesParameter.TranslationMode ?? CollectionParameterTranslationMode)
823+
switch (translationMode)
828824
{
829825
case ParameterTranslationMode.MultipleParameters:
830826
// see #36311 for more info
831827
case ParameterTranslationMode.Parameter:
832828
{
833829
// Create parameter for value if we didn't create it yet,
834830
// otherwise reuse it.
835-
if (expandedParameters.Count <= i)
836-
{
837-
var parameterName = Uniquifier.Uniquify(valuesParameter.Name, parameters, int.MaxValue);
838-
parameters.Add(parameterName, values[i]);
839-
var parameterExpression = new SqlParameterExpression(parameterName, values[i]?.GetType() ?? typeof(object), elementTypeMapping);
840-
expandedParameters.Add(parameterExpression);
841-
}
831+
ExpandParameterIfNeeded(valuesParameter.Name, expandedParameters, parameters, expandedParametersCounter, value, elementTypeMapping);
842832

843-
// Use separate counter, because we may skip nulls.
844833
processedValues.Add(expandedParameters[expandedParametersCounter++]);
845834

846835
break;
847836
}
848837

849838
case ParameterTranslationMode.Constant:
850839
{
851-
processedValues.Add(_sqlExpressionFactory.Constant(values[i], values[i]?.GetType() ?? typeof(object), sensitive: true, elementTypeMapping));
840+
processedValues.Add(_sqlExpressionFactory.Constant(value, value?.GetType() ?? typeof(object), sensitive: true, elementTypeMapping));
852841

853842
break;
854843
}
@@ -857,6 +846,38 @@ InExpression ProcessInExpressionValues(
857846
throw new UnreachableException();
858847
}
859848
}
849+
850+
// Bucketization is a process used to group parameters into "buckets" of a fixed size when generating parameterized collections.
851+
// This helps mitigate query plan bloat by reducing the number of unique query plans generated for queries with varying numbers
852+
// of parameters. Instead of creating a new query plan for every possible parameter count, bucketization ensures that queries
853+
// with similar parameter counts share the same query plan.
854+
//
855+
// The size of each bucket is determined by the CalculateParameterBucketSize method, which dynamically calculates the bucket size
856+
// based on the total number of parameters and the type mapping of the collection elements. For example, smaller collections may
857+
// use smaller bucket sizes, while larger collections may use larger bucket sizes to balance performance and memory usage.
858+
//
859+
// If the number of parameters in the collection is not a multiple of the bucket size, padding is added to ensure the collection
860+
// fits into the nearest bucket. This padding uses the last value in the collection to fill the remaining slots.
861+
//
862+
// Providers can effectively disable bucketization by overriding the CalculateParameterBucketSize method to always return 1.
863+
//
864+
// Example:
865+
// Suppose a query has 12 parameters, and the bucket size is calculated as 10. The query will be padded with 8 additional
866+
// parameters (using the last value) to fit into the next bucket size of 20. This ensures that queries with 12, 13, or 19
867+
// parameters all share the same query plan, reducing query plan fragmentation.
868+
if (translationMode is ParameterTranslationMode.MultipleParameters)
869+
{
870+
var padFactor = CalculateParameterBucketSize(values.Count, elementTypeMapping);
871+
var padding = (padFactor - (values.Count % padFactor)) % padFactor;
872+
for (var i = 0; i < padding; i++)
873+
{
874+
// Create parameter for value if we didn't create it yet,
875+
// otherwise reuse it.
876+
ExpandParameterIfNeeded(valuesParameter.Name, expandedParameters, parameters, values.Count + i, values[^1], elementTypeMapping);
877+
878+
processedValues.Add(expandedParameters[expandedParametersCounter++]);
879+
}
880+
}
860881
}
861882
else
862883
{
@@ -1488,6 +1509,23 @@ protected virtual SqlExpression VisitJsonScalar(
14881509
protected virtual bool PreferExistsToInWithCoalesce
14891510
=> false;
14901511

1512+
/// <summary>
1513+
/// Gets the bucket size into which the parameters are padded when generating a parameterized collection
1514+
/// when using multiple parameters. This helps with query plan bloat.
1515+
/// </summary>
1516+
/// <param name="count">Number of value parameters.</param>
1517+
/// <param name="elementTypeMapping">The type mapping for the collection element.</param>
1518+
[EntityFrameworkInternal]
1519+
protected virtual int CalculateParameterBucketSize(int count, RelationalTypeMapping elementTypeMapping)
1520+
=> count switch
1521+
{
1522+
<= 5 => 1,
1523+
<= 150 => 10,
1524+
<= 750 => 50,
1525+
<= 2000 => 100,
1526+
_ => 200,
1527+
};
1528+
14911529
// Note that we can check parameter values for null since we cache by the parameter nullability; but we cannot do the same for bool.
14921530
private bool IsNull(SqlExpression? expression)
14931531
=> expression is SqlConstantExpression { Value: null }
@@ -2121,4 +2159,21 @@ private SqlExpression ProcessNullNotNull(SqlExpression sqlExpression, bool opera
21212159

21222160
private static bool IsLogicalNot(SqlUnaryExpression? sqlUnaryExpression)
21232161
=> sqlUnaryExpression is { OperatorType: ExpressionType.Not } && sqlUnaryExpression.Type == typeof(bool);
2162+
2163+
private static void ExpandParameterIfNeeded(
2164+
string valuesParameterName,
2165+
List<SqlParameterExpression> expandedParameters,
2166+
Dictionary<string, object?> parameters,
2167+
int index,
2168+
object? value,
2169+
RelationalTypeMapping typeMapping)
2170+
{
2171+
if (expandedParameters.Count <= index)
2172+
{
2173+
var parameterName = Uniquifier.Uniquify(valuesParameterName, parameters, int.MaxValue);
2174+
parameters.Add(parameterName, value);
2175+
var parameterExpression = new SqlParameterExpression(parameterName, value?.GetType() ?? typeof(object), typeMapping);
2176+
expandedParameters.Add(parameterExpression);
2177+
}
2178+
}
21242179
}

src/EFCore.SqlServer/Query/Internal/SqlServerSqlNullabilityProcessor.cs

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ namespace Microsoft.EntityFrameworkCore.SqlServer.Query.Internal;
1616
/// </summary>
1717
public class SqlServerSqlNullabilityProcessor : SqlNullabilityProcessor
1818
{
19+
private const int MaxParameterCount = 2100;
20+
1921
/// <summary>
2022
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
2123
/// the same compatibility standards as public APIs. It may be changed or removed without notice in
@@ -280,6 +282,19 @@ protected override SqlExpression VisitIn(InExpression inExpression, bool allowOp
280282
}
281283
}
282284

285+
/// <inheritdoc />
286+
protected override int CalculateParameterBucketSize(int count, RelationalTypeMapping elementTypeMapping)
287+
=> count switch
288+
{
289+
<= 5 => 1,
290+
<= 150 => 10,
291+
<= 750 => 50,
292+
<= 2000 => 100,
293+
<= 2070 => 10, // try not to over-pad as we approach that limit
294+
<= MaxParameterCount => 0, // just don't pad between 2070 and 2100, to minimize the crazy
295+
_ => 200,
296+
};
297+
283298
private bool TryHandleOverLimitParameters(
284299
SqlParameterExpression valuesParameter,
285300
RelationalTypeMapping typeMapping,
@@ -294,7 +309,7 @@ private bool TryHandleOverLimitParameters(
294309
// SQL Server has limit on number of parameters in a query.
295310
// If we're over that limit, we switch to using single parameter
296311
// and processing it through JSON functions.
297-
if (values.Count > 2098)
312+
if (values.Count > MaxParameterCount)
298313
{
299314
if (_sqlServerSingletonOptions.SupportsJsonFunctions)
300315
{

test/EFCore.Relational.Specification.Tests/Query/NonSharedPrimitiveCollectionsQueryRelationalTestBase.cs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,27 @@ public virtual async Task Parameter_collection_Contains_with_default_mode_EF_Mul
215215
Assert.Equivalent(new[] { 2 }, result);
216216
}
217217

218+
[ConditionalFact]
219+
public virtual async Task Parameter_collection_Contains_parameter_bucketization()
220+
{
221+
var contextFactory = await InitializeAsync<TestContext>(
222+
onConfiguring: b => SetParameterizedCollectionMode(b, ParameterTranslationMode.MultipleParameters),
223+
seed: context =>
224+
{
225+
context.AddRange(
226+
new TestEntity { Id = 1 },
227+
new TestEntity { Id = 2 },
228+
new TestEntity { Id = 100 });
229+
return context.SaveChangesAsync();
230+
});
231+
232+
await using var context = contextFactory.CreateContext();
233+
234+
var ints = new[] { 2, 999, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 };
235+
var result = await context.Set<TestEntity>().Where(c => ints.Contains(c.Id)).Select(c => c.Id).ToListAsync();
236+
Assert.Equivalent(new[] { 2 }, result);
237+
}
238+
218239
protected class TestOwner
219240
{
220241
public int Id { get; set; }

0 commit comments

Comments
 (0)