diff --git a/PhotoLocator/BitmapOperations/FloatBitmap.cs b/PhotoLocator/BitmapOperations/FloatBitmap.cs index 66f12e2..5194dc9 100644 --- a/PhotoLocator/BitmapOperations/FloatBitmap.cs +++ b/PhotoLocator/BitmapOperations/FloatBitmap.cs @@ -227,6 +227,8 @@ public BitmapSource ToBitmapSource(double dpiX, double dpiY, double gamma, Pixel private byte[] ToPixels8(double gamma) { + if (Elements is null) + throw new InvalidOperationException("Bitmap not initialized"); var pixels = ArrayPool.Shared.Rent(Height * Stride); var gammaLut = CreateGammaLookupFloatToByte(gamma); unsafe diff --git a/PhotoLocator/BitmapOperations/IIRMinMaxOperation.cs b/PhotoLocator/BitmapOperations/IIRMinMaxOperation.cs index 0433ed5..4c66fb3 100644 --- a/PhotoLocator/BitmapOperations/IIRMinMaxOperation.cs +++ b/PhotoLocator/BitmapOperations/IIRMinMaxOperation.cs @@ -48,7 +48,59 @@ static public void MinFilter(FloatBitmap plane, float filterSize) } }); // Vertical - Parallel.For(0, plane.Stride, x => + int quarterWidth = plane.Stride / 4; + Parallel.For(0, quarterWidth, x4 => + { + int x = x4 * 4; + var width = plane.Stride; + var height = plane.Height; + fixed (float* pixels = plane.Elements) + { + var pix0 = &pixels[x]; + var prev0 = *pix0; + var pix1 = pix0 + 1; + var prev1 = *pix1; + var pix2 = pix0 + 2; + var prev2 = *pix2; + var pix3 = pix0 + 3; + var prev3 = *pix3; + for (var y = 1; y < height; y++) + { + pix0 += width; + if (prev0 < *pix0) { prev0 = (prev0 * filterSize + *pix0) * scale; *pix0 = prev0; } + else prev0 = *pix0; + pix1 += width; + if (prev1 < *pix1) { prev1 = (prev1 * filterSize + *pix1) * scale; *pix1 = prev1; } + else prev1 = *pix1; + pix2 += width; + if (prev2 < *pix2) { prev2 = (prev2 * filterSize + *pix2) * scale; *pix2 = prev2; } + else prev2 = *pix2; + pix3 += width; + if (prev3 < *pix3) { prev3 = (prev3 * filterSize + *pix3) * scale; *pix3 = prev3; } + else prev3 = *pix3; + } + prev0 = *pix0; + prev1 = *pix1; + prev2 = *pix2; + prev3 = *pix3; + for (var y = 1; y < height; y++) + { + pix0 -= width; + if (prev0 < *pix0) { prev0 = (prev0 * filterSize + *pix0) * scale; *pix0 = prev0; } + else prev0 = *pix0; + pix1 -= width; + if (prev1 < *pix1) { prev1 = (prev1 * filterSize + *pix1) * scale; *pix1 = prev1; } + else prev1 = *pix1; + pix2 -= width; + if (prev2 < *pix2) { prev2 = (prev2 * filterSize + *pix2) * scale; *pix2 = prev2; } + else prev2 = *pix2; + pix3 -= width; + if (prev3 < *pix3) { prev3 = (prev3 * filterSize + *pix3) * scale; *pix3 = prev3; } + else prev3 = *pix3; + } + } + }); + Parallel.For(quarterWidth * 4, plane.Stride, x => { var width = plane.Stride; var height = plane.Height; @@ -59,25 +111,15 @@ static public void MinFilter(FloatBitmap plane, float filterSize) for (var y = 1; y < height; y++) { pix += width; - if (prev < *pix) - { - prev = (prev * filterSize + *pix) * scale; - *pix = prev; - } - else - prev = *pix; + if (prev < *pix) { prev = (prev * filterSize + *pix) * scale; *pix = prev; } + else prev = *pix; } prev = *pix; for (var y = 1; y < height; y++) { pix -= width; - if (prev < *pix) - { - prev = (prev * filterSize + *pix) * scale; - *pix = prev; - } - else - prev = *pix; + if (prev < *pix) { prev = (prev * filterSize + *pix) * scale; *pix = prev; } + else prev = *pix; } } }); @@ -129,7 +171,59 @@ static public void MaxFilter(FloatBitmap plane, float filterSize) } }); // Vertical - Parallel.For(0, plane.Stride, x => + int quarterWidth = plane.Stride / 4; + Parallel.For(0, quarterWidth, x4 => + { + int x = x4 * 4; + var width = plane.Stride; + var height = plane.Height; + fixed (float* pixels = plane.Elements) + { + var pix0 = &pixels[x]; + var prev0 = *pix0; + var pix1 = pix0 + 1; + var prev1 = *pix1; + var pix2 = pix0 + 2; + var prev2 = *pix2; + var pix3 = pix0 + 3; + var prev3 = *pix3; + for (var y = 1; y < height; y++) + { + pix0 += width; + if (prev0 > *pix0) { prev0 = (prev0 * filterSize + *pix0) * scale; *pix0 = prev0; } + else prev0 = *pix0; + pix1 += width; + if (prev1 > *pix1) { prev1 = (prev1 * filterSize + *pix1) * scale; *pix1 = prev1; } + else prev1 = *pix1; + pix2 += width; + if (prev2 > *pix2) { prev2 = (prev2 * filterSize + *pix2) * scale; *pix2 = prev2; } + else prev2 = *pix2; + pix3 += width; + if (prev3 > *pix3) { prev3 = (prev3 * filterSize + *pix3) * scale; *pix3 = prev3; } + else prev3 = *pix3; + } + prev0 = *pix0; + prev1 = *pix1; + prev2 = *pix2; + prev3 = *pix3; + for (var y = 1; y < height; y++) + { + pix0 -= width; + if (prev0 > *pix0) { prev0 = (prev0 * filterSize + *pix0) * scale; *pix0 = prev0; } + else prev0 = *pix0; + pix1 -= width; + if (prev1 > *pix1) { prev1 = (prev1 * filterSize + *pix1) * scale; *pix1 = prev1; } + else prev1 = *pix1; + pix2 -= width; + if (prev2 > *pix2) { prev2 = (prev2 * filterSize + *pix2) * scale; *pix2 = prev2; } + else prev2 = *pix2; + pix3 -= width; + if (prev3 > *pix3) { prev3 = (prev3 * filterSize + *pix3) * scale; *pix3 = prev3; } + else prev3 = *pix3; + } + } + }); + Parallel.For(quarterWidth * 4, plane.Stride, x => { var width = plane.Stride; var height = plane.Height; @@ -140,25 +234,15 @@ static public void MaxFilter(FloatBitmap plane, float filterSize) for (var y = 1; y < height; y++) { pix += width; - if (prev > *pix) - { - prev = (prev * filterSize + *pix) * scale; - *pix = prev; - } - else - prev = *pix; + if (prev > *pix) { prev = (prev * filterSize + *pix) * scale; *pix = prev; } + else prev = *pix; } prev = *pix; for (var y = 1; y < height; y++) { pix -= width; - if (prev > *pix) - { - prev = (prev * filterSize + *pix) * scale; - *pix = prev; - } - else - prev = *pix; + if (prev > *pix) { prev = (prev * filterSize + *pix) * scale; *pix = prev; } + else prev = *pix; } } }); diff --git a/PhotoLocator/BitmapOperations/IIRSmoothOperation.cs b/PhotoLocator/BitmapOperations/IIRSmoothOperation.cs index 790af72..180c696 100644 --- a/PhotoLocator/BitmapOperations/IIRSmoothOperation.cs +++ b/PhotoLocator/BitmapOperations/IIRSmoothOperation.cs @@ -1,5 +1,8 @@ using System.Diagnostics; using System.Threading.Tasks; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using System.Numerics; namespace PhotoLocator.BitmapOperations { @@ -12,23 +15,24 @@ public static void Apply(FloatBitmap plane, float filterSize) Debug.Assert(plane.PlaneCount == 1); filterSize /= 4f; var scale = 1f / (1f + filterSize); + var height = plane.Height; unsafe { // Horizontal smooth - Parallel.For(0, plane.Height, y => + Parallel.For(0, height, y => { - var width = plane.Stride; + var stride = plane.Stride; fixed (float* pixels = plane.Elements) { - var pix = &pixels[y * width]; + var pix = &pixels[y * stride]; var value = *pix; - for (var x = width; x > 1; x--) + for (var x = stride; x > 1; x--) { pix++; value = (value * filterSize + *pix) * scale; *pix = value; } - for (var x = width; x > 1; x--) + for (var x = stride; x > 1; x--) { pix--; value = (value * filterSize + *pix) * scale; @@ -36,23 +40,54 @@ public static void Apply(FloatBitmap plane, float filterSize) } } }); - // Vertical smooth - Parallel.For(0, plane.Stride, x => + + // Vertical smooth - vectorized over columns when possible + int vectorSize = Vector.Count; + int vectorizedSegments = plane.Stride / vectorSize; + var filterSizeV = Vector.Create(filterSize); + var scaleV = Vector.Create(scale); + Parallel.For(0, vectorizedSegments, vi => + { + int x = vi * vectorSize; + var stride = plane.Stride; + fixed (float* pixels = plane.Elements) + { + float* colPtr = &pixels[x]; + var v = Vector.Load(colPtr); + for (var y = height; y > 1; y--) + { + colPtr += stride; + var inV = Vector.Load(colPtr); + v = Vector.Multiply(Vector.Add(Vector.Multiply(v, filterSizeV), inV), scaleV); + v.Store(colPtr); + } + for (var y = height; y > 1; y--) + { + colPtr -= stride; + var inV = Vector.Load(colPtr); + v = Vector.Multiply(Vector.Add(Vector.Multiply(v, filterSizeV), inV), scaleV); + v.Store(colPtr); + } + } + }); + + // Remaining columns - columns not divisible by vectorSize + Parallel.For(vectorizedSegments * vectorSize, plane.Stride, x => { - var width = plane.Stride; + var stride = plane.Stride; fixed (float* pixels = plane.Elements) { var pix = &pixels[x]; var value = *pix; - for (var y = plane.Height; y > 1; y--) + for (var y = height; y > 1; y--) { - pix += width; + pix += stride; value = (value * filterSize + *pix) * scale; *pix = value; } - for (var y = plane.Height; y > 1; y--) + for (var y = height; y > 1; y--) { - pix -= width; + pix -= stride; value = (value * filterSize + *pix) * scale; *pix = value; } diff --git a/PhotoLocatorTest/BenchmarkHelper.cs b/PhotoLocatorTest/BenchmarkHelper.cs new file mode 100644 index 0000000..a3d0750 --- /dev/null +++ b/PhotoLocatorTest/BenchmarkHelper.cs @@ -0,0 +1,33 @@ +using System.Diagnostics; +using System.Linq; + +namespace PhotoLocator +{ + public static class BenchmarkHelper + { + public static void Run(Action action, int innerLoops = 1, int outerIterations = 5) + { +#if DEBUG + Console.WriteLine("WARNING: Running benchmark in DEBUG mode. Results may not reflect release performance."); +#endif + var iterationTimes = new long[outerIterations]; + for (int i = 0; i < outerIterations; i++) + { + GC.Collect(); + GC.TryStartNoGCRegion(1024 * 1024 * 100); + var sw = Stopwatch.StartNew(); + for (int j = 0; j < innerLoops; j++) + action(); + sw.Stop(); + Console.WriteLine($"Iteration {i + 1}: {sw.ElapsedMilliseconds} ms"); + GC.EndNoGCRegion(); + iterationTimes[i] = sw.ElapsedMilliseconds; + } + var median = iterationTimes.Order().Skip(outerIterations / 2).First(); + var min = iterationTimes.Min(); + Console.WriteLine($"Median time: {median} ms"); + Console.WriteLine($"Minimum time: {min} ms"); + throw new AssertInconclusiveException($"Min={min} ms, median={median} ms"); + } + } +} diff --git a/PhotoLocatorTest/BitmapOperations/IncreaseLocalContrastOperationTest.cs b/PhotoLocatorTest/BitmapOperations/IncreaseLocalContrastOperationTest.cs index 229546e..b3fa651 100644 --- a/PhotoLocatorTest/BitmapOperations/IncreaseLocalContrastOperationTest.cs +++ b/PhotoLocatorTest/BitmapOperations/IncreaseLocalContrastOperationTest.cs @@ -32,6 +32,7 @@ public void Apply_IncreaseLocalContrast() #if DEBUG GeneralFileFormatHandler.SaveToFile(result, "localContrast.png"); #endif + Assert.AreEqual(0.23394798570186837, op.DstBitmap.Mean(), 1e-5); } } }