Skip to content

Commit 7134a49

Browse files
committed
Use ordered reduction intrinsics for integer reductions
only ordered intrinsics have implementation in rustc-const-eval
1 parent 50134e1 commit 7134a49

File tree

2 files changed

+30
-32
lines changed

2 files changed

+30
-32
lines changed

crates/core_arch/src/x86/avx512bw.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4810,7 +4810,7 @@ pub fn _mm_mask_cmp_epi8_mask<const IMM8: i32>(k1: __mmask16, a: __m128i, b: __m
48104810
#[target_feature(enable = "avx512bw,avx512vl")]
48114811
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48124812
pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
4813-
unsafe { simd_reduce_add_unordered(a.as_i16x16()) }
4813+
unsafe { simd_reduce_add_ordered(a.as_i16x16(), 0) }
48144814
}
48154815

48164816
/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4820,7 +4820,7 @@ pub fn _mm256_reduce_add_epi16(a: __m256i) -> i16 {
48204820
#[target_feature(enable = "avx512bw,avx512vl")]
48214821
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48224822
pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
4823-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO)) }
4823+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::ZERO), 0) }
48244824
}
48254825

48264826
/// Reduce the packed 16-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4830,7 +4830,7 @@ pub fn _mm256_mask_reduce_add_epi16(k: __mmask16, a: __m256i) -> i16 {
48304830
#[target_feature(enable = "avx512bw,avx512vl")]
48314831
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48324832
pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
4833-
unsafe { simd_reduce_add_unordered(a.as_i16x8()) }
4833+
unsafe { simd_reduce_add_ordered(a.as_i16x8(), 0) }
48344834
}
48354835

48364836
/// Reduce the packed 16-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4840,7 +4840,7 @@ pub fn _mm_reduce_add_epi16(a: __m128i) -> i16 {
48404840
#[target_feature(enable = "avx512bw,avx512vl")]
48414841
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48424842
pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
4843-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO)) }
4843+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::ZERO), 0) }
48444844
}
48454845

48464846
/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4850,7 +4850,7 @@ pub fn _mm_mask_reduce_add_epi16(k: __mmask8, a: __m128i) -> i16 {
48504850
#[target_feature(enable = "avx512bw,avx512vl")]
48514851
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48524852
pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
4853-
unsafe { simd_reduce_add_unordered(a.as_i8x32()) }
4853+
unsafe { simd_reduce_add_ordered(a.as_i8x32(), 0) }
48544854
}
48554855

48564856
/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4860,7 +4860,7 @@ pub fn _mm256_reduce_add_epi8(a: __m256i) -> i8 {
48604860
#[target_feature(enable = "avx512bw,avx512vl")]
48614861
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48624862
pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
4863-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO)) }
4863+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::ZERO), 0) }
48644864
}
48654865

48664866
/// Reduce the packed 8-bit integers in a by addition. Returns the sum of all elements in a.
@@ -4870,7 +4870,7 @@ pub fn _mm256_mask_reduce_add_epi8(k: __mmask32, a: __m256i) -> i8 {
48704870
#[target_feature(enable = "avx512bw,avx512vl")]
48714871
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48724872
pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
4873-
unsafe { simd_reduce_add_unordered(a.as_i8x16()) }
4873+
unsafe { simd_reduce_add_ordered(a.as_i8x16(), 0) }
48744874
}
48754875

48764876
/// Reduce the packed 8-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -4880,7 +4880,7 @@ pub fn _mm_reduce_add_epi8(a: __m128i) -> i8 {
48804880
#[target_feature(enable = "avx512bw,avx512vl")]
48814881
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
48824882
pub fn _mm_mask_reduce_add_epi8(k: __mmask16, a: __m128i) -> i8 {
4883-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO)) }
4883+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::ZERO), 0) }
48844884
}
48854885

48864886
/// Reduce the packed 16-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
@@ -5314,7 +5314,7 @@ pub fn _mm_mask_reduce_min_epu8(k: __mmask16, a: __m128i) -> u8 {
53145314
#[target_feature(enable = "avx512bw,avx512vl")]
53155315
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53165316
pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
5317-
unsafe { simd_reduce_mul_unordered(a.as_i16x16()) }
5317+
unsafe { simd_reduce_mul_ordered(a.as_i16x16(), 1) }
53185318
}
53195319

53205320
/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5324,7 +5324,7 @@ pub fn _mm256_reduce_mul_epi16(a: __m256i) -> i16 {
53245324
#[target_feature(enable = "avx512bw,avx512vl")]
53255325
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53265326
pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
5327-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1))) }
5327+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x16(), i16x16::splat(1)), 1) }
53285328
}
53295329

53305330
/// Reduce the packed 16-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5334,7 +5334,7 @@ pub fn _mm256_mask_reduce_mul_epi16(k: __mmask16, a: __m256i) -> i16 {
53345334
#[target_feature(enable = "avx512bw,avx512vl")]
53355335
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53365336
pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
5337-
unsafe { simd_reduce_mul_unordered(a.as_i16x8()) }
5337+
unsafe { simd_reduce_mul_ordered(a.as_i16x8(), 1) }
53385338
}
53395339

53405340
/// Reduce the packed 16-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5344,7 +5344,7 @@ pub fn _mm_reduce_mul_epi16(a: __m128i) -> i16 {
53445344
#[target_feature(enable = "avx512bw,avx512vl")]
53455345
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53465346
pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
5347-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1))) }
5347+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i16x8(), i16x8::splat(1)), 1) }
53485348
}
53495349

53505350
/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5354,7 +5354,7 @@ pub fn _mm_mask_reduce_mul_epi16(k: __mmask8, a: __m128i) -> i16 {
53545354
#[target_feature(enable = "avx512bw,avx512vl")]
53555355
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53565356
pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
5357-
unsafe { simd_reduce_mul_unordered(a.as_i8x32()) }
5357+
unsafe { simd_reduce_mul_ordered(a.as_i8x32(), 1) }
53585358
}
53595359

53605360
/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5364,7 +5364,7 @@ pub fn _mm256_reduce_mul_epi8(a: __m256i) -> i8 {
53645364
#[target_feature(enable = "avx512bw,avx512vl")]
53655365
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53665366
pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
5367-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1))) }
5367+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x32(), i8x32::splat(1)), 1) }
53685368
}
53695369

53705370
/// Reduce the packed 8-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -5374,7 +5374,7 @@ pub fn _mm256_mask_reduce_mul_epi8(k: __mmask32, a: __m256i) -> i8 {
53745374
#[target_feature(enable = "avx512bw,avx512vl")]
53755375
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53765376
pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
5377-
unsafe { simd_reduce_mul_unordered(a.as_i8x16()) }
5377+
unsafe { simd_reduce_mul_ordered(a.as_i8x16(), 1) }
53785378
}
53795379

53805380
/// Reduce the packed 8-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -5384,7 +5384,7 @@ pub fn _mm_reduce_mul_epi8(a: __m128i) -> i8 {
53845384
#[target_feature(enable = "avx512bw,avx512vl")]
53855385
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
53865386
pub fn _mm_mask_reduce_mul_epi8(k: __mmask16, a: __m128i) -> i8 {
5387-
unsafe { simd_reduce_mul_unordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1))) }
5387+
unsafe { simd_reduce_mul_ordered(simd_select_bitmask(k, a.as_i8x16(), i8x16::splat(1)), 1) }
53885388
}
53895389

53905390
/// Reduce the packed 16-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.

crates/core_arch/src/x86/avx512f.rs

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33171,7 +33171,7 @@ pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
3317133171
#[target_feature(enable = "avx512f")]
3317233172
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3317333173
pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33174-
unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33174+
unsafe { simd_reduce_add_ordered(a.as_i32x16(), 0) }
3317533175
}
3317633176

3317733177
/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33181,7 +33181,7 @@ pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
3318133181
#[target_feature(enable = "avx512f")]
3318233182
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3318333183
pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33184-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO)) }
33184+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i32x16(), i32x16::ZERO), 0) }
3318533185
}
3318633186

3318733187
/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
@@ -33191,7 +33191,7 @@ pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
3319133191
#[target_feature(enable = "avx512f")]
3319233192
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3319333193
pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33194-
unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33194+
unsafe { simd_reduce_add_ordered(a.as_i64x8(), 0) }
3319533195
}
3319633196

3319733197
/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
@@ -33201,7 +33201,7 @@ pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
3320133201
#[target_feature(enable = "avx512f")]
3320233202
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3320333203
pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33204-
unsafe { simd_reduce_add_unordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO)) }
33204+
unsafe { simd_reduce_add_ordered(simd_select_bitmask(k, a.as_i64x8(), i64x8::ZERO), 0) }
3320533205
}
3320633206

3320733207
/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
@@ -33267,7 +33267,7 @@ pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
3326733267
#[target_feature(enable = "avx512f")]
3326833268
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3326933269
pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33270-
unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33270+
unsafe { simd_reduce_mul_ordered(a.as_i32x16(), 1) }
3327133271
}
3327233272

3327333273
/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33278,11 +33278,10 @@ pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
3327833278
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3327933279
pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
3328033280
unsafe {
33281-
simd_reduce_mul_unordered(simd_select_bitmask(
33282-
k,
33283-
a.as_i32x16(),
33284-
_mm512_set1_epi32(1).as_i32x16(),
33285-
))
33281+
simd_reduce_mul_ordered(
33282+
simd_select_bitmask(k, a.as_i32x16(), _mm512_set1_epi32(1).as_i32x16()),
33283+
1,
33284+
)
3328633285
}
3328733286
}
3328833287

@@ -33293,7 +33292,7 @@ pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
3329333292
#[target_feature(enable = "avx512f")]
3329433293
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3329533294
pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33296-
unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33295+
unsafe { simd_reduce_mul_ordered(a.as_i64x8(), 1) }
3329733296
}
3329833297

3329933298
/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -33304,11 +33303,10 @@ pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
3330433303
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3330533304
pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
3330633305
unsafe {
33307-
simd_reduce_mul_unordered(simd_select_bitmask(
33308-
k,
33309-
a.as_i64x8(),
33310-
_mm512_set1_epi64(1).as_i64x8(),
33311-
))
33306+
simd_reduce_mul_ordered(
33307+
simd_select_bitmask(k, a.as_i64x8(), _mm512_set1_epi64(1).as_i64x8()),
33308+
1,
33309+
)
3331233310
}
3331333311
}
3331433312

0 commit comments

Comments
 (0)