Skip to content

Commit 27135a0

Browse files
Merge pull request #1533 from SixLabors/js/jpeg-encoder-perf
JpegEncoder - Optimize Some Low Hanging Fruit
2 parents edff37d + 461e59d commit 27135a0

File tree

10 files changed

+189
-125
lines changed

10 files changed

+189
-125
lines changed

src/ImageSharp/Formats/Jpeg/Components/Block8x8F.cs

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
1818
/// <summary>
1919
/// Represents a Jpeg block with <see cref="float"/> coefficients.
2020
/// </summary>
21+
[StructLayout(LayoutKind.Sequential)]
2122
internal partial struct Block8x8F : IEquatable<Block8x8F>
2223
{
2324
/// <summary>
@@ -51,9 +52,6 @@ internal partial struct Block8x8F : IEquatable<Block8x8F>
5152
public Vector4 V7R;
5253
#pragma warning restore SA1600 // ElementsMustBeDocumented
5354

54-
private static readonly Vector4 NegativeOne = new Vector4(-1);
55-
private static readonly Vector4 Offset = new Vector4(.5F);
56-
5755
/// <summary>
5856
/// Get/Set scalar elements at a given index
5957
/// </summary>
@@ -155,10 +153,7 @@ public static Block8x8F Load(Span<int> data)
155153
/// </summary>
156154
[MethodImpl(InliningOptions.ShortMethod)]
157155
public void Clear()
158-
{
159-
// The cheapest way to do this in C#:
160-
this = default;
161-
}
156+
=> this = default; // The cheapest way to do this in C#:
162157

163158
/// <summary>
164159
/// Load raw 32bit floating point data from source.
@@ -180,9 +175,7 @@ public void LoadFrom(Span<float> source)
180175
/// <param name="source">Source</param>
181176
[MethodImpl(InliningOptions.ShortMethod)]
182177
public static unsafe void LoadFrom(Block8x8F* blockPtr, Span<float> source)
183-
{
184-
blockPtr->LoadFrom(source);
185-
}
178+
=> blockPtr->LoadFrom(source);
186179

187180
/// <summary>
188181
/// Load raw 32bit floating point data from source
@@ -236,9 +229,7 @@ public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<byte> dest)
236229
/// <param name="dest">The destination.</param>
237230
[MethodImpl(InliningOptions.ShortMethod)]
238231
public static unsafe void ScaledCopyTo(Block8x8F* blockPtr, Span<float> dest)
239-
{
240-
blockPtr->ScaledCopyTo(dest);
241-
}
232+
=> blockPtr->ScaledCopyTo(dest);
242233

243234
/// <summary>
244235
/// Copy raw 32bit floating point data to dest
@@ -439,7 +430,6 @@ public void AddInPlace(float value)
439430
/// <param name="blockPtr">The block pointer.</param>
440431
/// <param name="qtPtr">The qt pointer.</param>
441432
/// <param name="unzigPtr">Unzig pointer</param>
442-
// [MethodImpl(MethodImplOptions.AggressiveInlining)]
443433
public static unsafe void DequantizeBlock(Block8x8F* blockPtr, Block8x8F* qtPtr, byte* unzigPtr)
444434
{
445435
float* b = (float*)blockPtr;
@@ -556,22 +546,60 @@ private static unsafe void Scale16X16To8X8Scalar(ref Block8x8F destination, Read
556546
[MethodImpl(InliningOptions.ShortMethod)]
557547
private static void DivideRoundAll(ref Block8x8F a, ref Block8x8F b)
558548
{
559-
a.V0L = DivideRound(a.V0L, b.V0L);
560-
a.V0R = DivideRound(a.V0R, b.V0R);
561-
a.V1L = DivideRound(a.V1L, b.V1L);
562-
a.V1R = DivideRound(a.V1R, b.V1R);
563-
a.V2L = DivideRound(a.V2L, b.V2L);
564-
a.V2R = DivideRound(a.V2R, b.V2R);
565-
a.V3L = DivideRound(a.V3L, b.V3L);
566-
a.V3R = DivideRound(a.V3R, b.V3R);
567-
a.V4L = DivideRound(a.V4L, b.V4L);
568-
a.V4R = DivideRound(a.V4R, b.V4R);
569-
a.V5L = DivideRound(a.V5L, b.V5L);
570-
a.V5R = DivideRound(a.V5R, b.V5R);
571-
a.V6L = DivideRound(a.V6L, b.V6L);
572-
a.V6R = DivideRound(a.V6R, b.V6R);
573-
a.V7L = DivideRound(a.V7L, b.V7L);
574-
a.V7R = DivideRound(a.V7R, b.V7R);
549+
#if SUPPORTS_RUNTIME_INTRINSICS
550+
if (Avx.IsSupported)
551+
{
552+
var vnegOne = Vector256.Create(-1f);
553+
var vadd = Vector256.Create(.5F);
554+
var vone = Vector256.Create(1f);
555+
556+
ref Vector256<float> aBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref a.V0L));
557+
ref Vector256<float> bBase = ref Unsafe.AsRef(Unsafe.As<Vector4, Vector256<float>>(ref b.V0L));
558+
ref Vector256<float> aEnd = ref Unsafe.Add(ref aBase, 8);
559+
560+
do
561+
{
562+
Vector256<float> voff = Avx.Multiply(Avx.Min(Avx.Max(vnegOne, aBase), vone), vadd);
563+
Unsafe.Add(ref aBase, 0) = Avx.Add(Avx.Divide(aBase, bBase), voff);
564+
565+
aBase = ref Unsafe.Add(ref aBase, 1);
566+
bBase = ref Unsafe.Add(ref bBase, 1);
567+
}
568+
while (Unsafe.IsAddressLessThan(ref aBase, ref aEnd));
569+
}
570+
else
571+
#endif
572+
{
573+
a.V0L = DivideRound(a.V0L, b.V0L);
574+
a.V0R = DivideRound(a.V0R, b.V0R);
575+
a.V1L = DivideRound(a.V1L, b.V1L);
576+
a.V1R = DivideRound(a.V1R, b.V1R);
577+
a.V2L = DivideRound(a.V2L, b.V2L);
578+
a.V2R = DivideRound(a.V2R, b.V2R);
579+
a.V3L = DivideRound(a.V3L, b.V3L);
580+
a.V3R = DivideRound(a.V3R, b.V3R);
581+
a.V4L = DivideRound(a.V4L, b.V4L);
582+
a.V4R = DivideRound(a.V4R, b.V4R);
583+
a.V5L = DivideRound(a.V5L, b.V5L);
584+
a.V5R = DivideRound(a.V5R, b.V5R);
585+
a.V6L = DivideRound(a.V6L, b.V6L);
586+
a.V6R = DivideRound(a.V6R, b.V6R);
587+
a.V7L = DivideRound(a.V7L, b.V7L);
588+
a.V7R = DivideRound(a.V7R, b.V7R);
589+
}
590+
}
591+
592+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
593+
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
594+
{
595+
var neg = new Vector4(-1);
596+
var add = new Vector4(.5F);
597+
598+
// sign(dividend) = max(min(dividend, 1), -1)
599+
Vector4 sign = Numerics.Clamp(dividend, neg, Vector4.One);
600+
601+
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
602+
return (dividend / divisor) + (sign * add);
575603
}
576604

577605
public void RoundInto(ref Block8x8 dest)
@@ -673,8 +701,7 @@ public void LoadFromInt16ExtendedAvx2(ref Block8x8 source)
673701

674702
/// <inheritdoc />
675703
public bool Equals(Block8x8F other)
676-
{
677-
return this.V0L == other.V0L
704+
=> this.V0L == other.V0L
678705
&& this.V0R == other.V0R
679706
&& this.V1L == other.V1L
680707
&& this.V1R == other.V1R
@@ -690,7 +717,6 @@ public bool Equals(Block8x8F other)
690717
&& this.V6R == other.V6R
691718
&& this.V7L == other.V7L
692719
&& this.V7R == other.V7R;
693-
}
694720

695721
/// <inheritdoc />
696722
public override string ToString()
@@ -718,16 +744,6 @@ private static Vector<float> NormalizeAndRound(Vector<float> row, Vector<float>
718744
return row.FastRound();
719745
}
720746

721-
[MethodImpl(InliningOptions.ShortMethod)]
722-
private static Vector4 DivideRound(Vector4 dividend, Vector4 divisor)
723-
{
724-
// sign(dividend) = max(min(dividend, 1), -1)
725-
Vector4 sign = Numerics.Clamp(dividend, NegativeOne, Vector4.One);
726-
727-
// AlmostRound(dividend/divisor) = dividend/divisor + 0.5*sign(dividend)
728-
return (dividend / divisor) + (sign * Offset);
729-
}
730-
731747
[Conditional("DEBUG")]
732748
private static void GuardBlockIndex(int idx)
733749
{

src/ImageSharp/Formats/Jpeg/Components/Encoder/YCbCrForwardConverter{TPixel}.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,9 @@ public static YCbCrForwardConverter<TPixel> Create()
5959
/// <summary>
6060
/// Converts a 8x8 image area inside 'pixels' at position (x,y) placing the result members of the structure (<see cref="Y"/>, <see cref="Cb"/>, <see cref="Cr"/>)
6161
/// </summary>
62-
public void Convert(ImageFrame<TPixel> frame, int x, int y, in RowOctet<TPixel> currentRows)
62+
public void Convert(ImageFrame<TPixel> frame, int x, int y, ref RowOctet<TPixel> currentRows)
6363
{
64-
this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, currentRows);
64+
this.pixelBlock.LoadAndStretchEdges(frame.PixelBuffer, x, y, ref currentRows);
6565

6666
Span<Rgb24> rgbSpan = this.rgbBlock.AsSpanUnsafe();
6767
PixelOperations<TPixel>.Instance.ToRgb24(frame.GetConfiguration(), this.pixelBlock.AsSpanUnsafe(), rgbSpan);
@@ -76,7 +76,7 @@ public void Convert(ImageFrame<TPixel> frame, int x, int y, in RowOctet<TPixel>
7676
}
7777
else
7878
{
79-
this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
79+
this.colorTables.Convert(rgbSpan, ref yBlock, ref cbBlock, ref crBlock);
8080
}
8181
}
8282
}

src/ImageSharp/Formats/Jpeg/Components/GenericBlock8x8.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public T this[int idx]
5757
/// Load a 8x8 region of an image into the block.
5858
/// The "outlying" area of the block will be stretched out with pixels on the right and bottom edge of the image.
5959
/// </summary>
60-
public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, in RowOctet<T> currentRows)
60+
public void LoadAndStretchEdges(Buffer2D<T> source, int sourceX, int sourceY, ref RowOctet<T> currentRows)
6161
{
6262
int width = Math.Min(8, source.Width - sourceX);
6363
int height = Math.Min(8, source.Height - sourceY);
Lines changed: 61 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) Six Labors.
1+
// Copyright (c) Six Labors.
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
@@ -12,39 +12,24 @@ namespace SixLabors.ImageSharp.Formats.Jpeg.Components
1212
/// Cache 8 pixel rows on the stack, which may originate from different buffers of a <see cref="MemoryGroup{T}"/>.
1313
/// </summary>
1414
[StructLayout(LayoutKind.Sequential)]
15-
internal readonly ref struct RowOctet<T>
15+
internal ref struct RowOctet<T>
1616
where T : struct
1717
{
18-
private readonly Span<T> row0;
19-
private readonly Span<T> row1;
20-
private readonly Span<T> row2;
21-
private readonly Span<T> row3;
22-
private readonly Span<T> row4;
23-
private readonly Span<T> row5;
24-
private readonly Span<T> row6;
25-
private readonly Span<T> row7;
26-
27-
public RowOctet(Buffer2D<T> buffer, int startY)
28-
{
29-
int y = startY;
30-
int height = buffer.Height;
31-
this.row0 = y < height ? buffer.GetRowSpan(y++) : default;
32-
this.row1 = y < height ? buffer.GetRowSpan(y++) : default;
33-
this.row2 = y < height ? buffer.GetRowSpan(y++) : default;
34-
this.row3 = y < height ? buffer.GetRowSpan(y++) : default;
35-
this.row4 = y < height ? buffer.GetRowSpan(y++) : default;
36-
this.row5 = y < height ? buffer.GetRowSpan(y++) : default;
37-
this.row6 = y < height ? buffer.GetRowSpan(y++) : default;
38-
this.row7 = y < height ? buffer.GetRowSpan(y) : default;
39-
}
18+
private Span<T> row0;
19+
private Span<T> row1;
20+
private Span<T> row2;
21+
private Span<T> row3;
22+
private Span<T> row4;
23+
private Span<T> row5;
24+
private Span<T> row6;
25+
private Span<T> row7;
4026

27+
// No unsafe tricks, since Span<T> can't be used as a generic argument
4128
public Span<T> this[int y]
4229
{
43-
[MethodImpl(InliningOptions.ShortMethod)]
44-
get
45-
{
46-
// No unsafe tricks, since Span<T> can't be used as a generic argument
47-
return y switch
30+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
31+
get =>
32+
y switch
4833
{
4934
0 => this.row0,
5035
1 => this.row1,
@@ -56,13 +41,57 @@ public Span<T> this[int y]
5641
7 => this.row7,
5742
_ => ThrowIndexOutOfRangeException()
5843
};
44+
45+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
46+
private set
47+
{
48+
switch (y)
49+
{
50+
case 0:
51+
this.row0 = value;
52+
break;
53+
case 1:
54+
this.row1 = value;
55+
break;
56+
case 2:
57+
this.row2 = value;
58+
break;
59+
case 3:
60+
this.row3 = value;
61+
break;
62+
case 4:
63+
this.row4 = value;
64+
break;
65+
case 5:
66+
this.row5 = value;
67+
break;
68+
case 6:
69+
this.row6 = value;
70+
break;
71+
default:
72+
this.row7 = value;
73+
break;
74+
}
5975
}
6076
}
6177

62-
[MethodImpl(InliningOptions.ColdPath)]
63-
private static Span<T> ThrowIndexOutOfRangeException()
78+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
79+
public void Update(Buffer2D<T> buffer, int startY)
6480
{
65-
throw new IndexOutOfRangeException();
81+
// We don't actually have to assign values outside of the
82+
// frame pixel buffer since they are never requested.
83+
int y = startY;
84+
int yEnd = Math.Min(y + 8, buffer.Height);
85+
86+
int i = 0;
87+
while (y < yEnd)
88+
{
89+
this[i++] = buffer.GetRowSpan(y++);
90+
}
6691
}
92+
93+
[MethodImpl(MethodImplOptions.NoInlining)]
94+
private static Span<T> ThrowIndexOutOfRangeException()
95+
=> throw new IndexOutOfRangeException();
6796
}
6897
}

0 commit comments

Comments
 (0)