@@ -18,6 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
1818 /// <summary>
1919 /// Represents a Jpeg block with <see cref="float"/> coefficients.
2020 /// </summary>
21+ [ StructLayout ( LayoutKind . Sequential ) ]
2122 internal partial struct Block8x8F : IEquatable < Block8x8F >
2223 {
2324 /// <summary>
@@ -51,9 +52,6 @@ internal partial struct Block8x8F : IEquatable<Block8x8F>
5152 public Vector4 V7R ;
5253#pragma warning restore SA1600 // ElementsMustBeDocumented
5354
54- private static readonly Vector4 NegativeOne = new Vector4 ( - 1 ) ;
55- private static readonly Vector4 Offset = new Vector4 ( .5F ) ;
56-
5755 /// <summary>
5856 /// Get/Set scalar elements at a given index
5957 /// </summary>
@@ -155,10 +153,7 @@ public static Block8x8F Load(Span<int> data)
155153 /// </summary>
156154 [ MethodImpl ( InliningOptions . ShortMethod ) ]
157155 public void Clear ( )
158- {
159- // The cheapest way to do this in C#:
160- this = default ;
161- }
156+ => this = default ; // The cheapest way to do this in C#:
162157
163158 /// <summary>
164159 /// Load raw 32bit floating point data from source.
@@ -180,9 +175,7 @@ public void LoadFrom(Span<float> source)
180175 /// <param name="source">Source</param>
181176 [ MethodImpl ( InliningOptions . ShortMethod ) ]
182177 public static unsafe void LoadFrom ( Block8x8F * blockPtr , Span < float > source )
183- {
184- blockPtr ->LoadFrom ( source ) ;
185- }
178+ => blockPtr ->LoadFrom ( source ) ;
186179
187180 /// <summary>
188181 /// Load raw 32bit floating point data from source
@@ -236,9 +229,7 @@ public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<byte> dest)
236229 /// <param name="dest">The destination.</param>
237230 [ MethodImpl ( InliningOptions . ShortMethod ) ]
238231 public static unsafe void ScaledCopyTo ( Block8x8F * blockPtr , Span < float > dest )
239- {
240- blockPtr ->ScaledCopyTo ( dest ) ;
241- }
232+ => blockPtr ->ScaledCopyTo ( dest ) ;
242233
243234 /// <summary>
244235 /// Copy raw 32bit floating point data to dest
@@ -439,7 +430,6 @@ public void AddInPlace(float value)
439430 /// <param name="blockPtr">The block pointer.</param>
440431 /// <param name="qtPtr">The qt pointer.</param>
441432 /// <param name="unzigPtr">Unzig pointer</param>
442- // [MethodImpl(MethodImplOptions.AggressiveInlining)]
443433 public static unsafe void DequantizeBlock ( Block8x8F * blockPtr , Block8x8F * qtPtr , byte * unzigPtr )
444434 {
445435 float * b = ( float * ) blockPtr ;
@@ -556,22 +546,60 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read
556546 [ MethodImpl ( InliningOptions . ShortMethod ) ]
557547 private static void DivideRoundAll ( ref Block8x8F a , ref Block8x8F b )
558548 {
559- a . V0L = DivideRound ( a . V0L , b . V0L ) ;
560- a . V0R = DivideRound ( a . V0R , b . V0R ) ;
561- a . V1L = DivideRound ( a . V1L , b . V1L ) ;
562- a . V1R = DivideRound ( a . V1R , b . V1R ) ;
563- a . V2L = DivideRound ( a . V2L , b . V2L ) ;
564- a . V2R = DivideRound ( a . V2R , b . V2R ) ;
565- a . V3L = DivideRound ( a . V3L , b . V3L ) ;
566- a . V3R = DivideRound ( a . V3R , b . V3R ) ;
567- a . V4L = DivideRound ( a . V4L , b . V4L ) ;
568- a . V4R = DivideRound ( a . V4R , b . V4R ) ;
569- a . V5L = DivideRound ( a . V5L , b . V5L ) ;
570- a . V5R = DivideRound ( a . V5R , b . V5R ) ;
571- a . V6L = DivideRound ( a . V6L , b . V6L ) ;
572- a . V6R = DivideRound ( a . V6R , b . V6R ) ;
573- a . V7L = DivideRound ( a . V7L , b . V7L ) ;
574- a . V7R = DivideRound ( a . V7R , b . V7R ) ;
549+ #if SUPPORTS_RUNTIME_INTRINSICS
550+ if ( Avx . IsSupported )
551+ {
552+ var vnegOne = Vector256 . Create ( - 1f ) ;
553+ var vadd = Vector256 . Create ( .5F ) ;
554+ var vone = Vector256 . Create ( 1f ) ;
555+
556+ ref Vector256 < float > aBase = ref Unsafe . AsRef ( Unsafe . As < Vector4 , Vector256 < float > > ( ref a . V0L ) ) ;
557+ ref Vector256 < float > bBase = ref Unsafe . AsRef ( Unsafe . As < Vector4 , Vector256 < float > > ( ref b . V0L ) ) ;
558+ ref Vector256 < float > aEnd = ref Unsafe . Add ( ref aBase , 8 ) ;
559+
560+ do
561+ {
562+ Vector256 < float > voff = Avx . Multiply ( Avx . Min ( Avx . Max ( vnegOne , aBase ) , vone ) , vadd ) ;
563+ Unsafe . Add ( ref aBase , 0 ) = Avx. Add ( Avx . Divide ( aBase , bBase ) , voff ) ;
564+
565+ aBase = ref Unsafe . Add ( ref aBase , 1 ) ;
566+ bBase = ref Unsafe . Add ( ref bBase , 1 ) ;
567+ }
568+ while ( Unsafe . IsAddressLessThan ( ref aBase , ref aEnd ) ) ;
569+ }
570+ else
571+ #endif
572+ {
573+ a . V0L = DivideRound ( a . V0L , b . V0L ) ;
574+ a . V0R = DivideRound ( a . V0R , b . V0R ) ;
575+ a . V1L = DivideRound ( a . V1L , b . V1L ) ;
576+ a . V1R = DivideRound ( a . V1R , b . V1R ) ;
577+ a . V2L = DivideRound ( a . V2L , b . V2L ) ;
578+ a . V2R = DivideRound ( a . V2R , b . V2R ) ;
579+ a . V3L = DivideRound ( a . V3L , b . V3L ) ;
580+ a . V3R = DivideRound ( a . V3R , b . V3R ) ;
581+ a . V4L = DivideRound ( a . V4L , b . V4L ) ;
582+ a . V4R = DivideRound ( a . V4R , b . V4R ) ;
583+ a . V5L = DivideRound ( a . V5L , b . V5L ) ;
584+ a . V5R = DivideRound ( a . V5R , b . V5R ) ;
585+ a . V6L = DivideRound ( a . V6L , b . V6L ) ;
586+ a . V6R = DivideRound ( a . V6R , b . V6R ) ;
587+ a . V7L = DivideRound ( a . V7L , b . V7L ) ;
588+ a . V7R = DivideRound ( a . V7R , b . V7R ) ;
589+ }
590+ }
591+
592+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
593+ private static Vector4 DivideRound ( Vector4 dividend , Vector4 divisor )
594+ {
595+ var neg = new Vector4 ( - 1 ) ;
596+ var add = new Vector4 ( .5F ) ;
597+
598+ // sign(dividend) = max(min(dividend, 1), -1)
599+ Vector4 sign = Numerics . Clamp ( dividend , neg , Vector4 . One ) ;
600+
601+ // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
602+ return ( dividend / divisor ) + ( sign * add ) ;
575603 }
576604
577605 public void RoundInto ( ref Block8x8 dest )
@@ -673,8 +701,7 @@ public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)
673701
674702 /// <inheritdoc />
675703 public bool Equals ( Block8x8F other )
676- {
677- return this . V0L == other . V0L
704+ => this . V0L == other . V0L
678705 && this . V0R == other . V0R
679706 && this . V1L == other . V1L
680707 && this . V1R == other . V1R
@@ -690,7 +717,6 @@ public bool Equals(Block8x8F other)
690717 && this . V6R == other . V6R
691718 && this . V7L == other . V7L
692719 && this . V7R == other . V7R ;
693- }
694720
695721 /// <inheritdoc />
696722 public override string ToString ( )
@@ -718,16 +744,6 @@ private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float>
718744 return row . FastRound ( ) ;
719745 }
720746
721- [ MethodImpl ( InliningOptions . ShortMethod ) ]
722- private static Vector4 DivideRound ( Vector4 dividend , Vector4 divisor )
723- {
724- // sign(dividend) = max(min(dividend, 1), -1)
725- Vector4 sign = Numerics . Clamp ( dividend , NegativeOne , Vector4 . One ) ;
726-
727- // AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
728- return ( dividend / divisor ) + ( sign * Offset ) ;
729- }
730-
731747 [ Conditional ( "DEBUG" ) ]
732748 private static void GuardBlockIndex ( int idx )
733749 {
0 commit comments