From dc682df55cc51b03dadff5d9df89c2ef007d4e17 Mon Sep 17 00:00:00 2001 From: Paul Toffoloni Date: Mon, 19 May 2025 11:27:29 +0200 Subject: [PATCH 1/3] Unroll loop on sum if possible --- Sources/AccelerateLinux/VectorOps/vDSP.swift | 45 +++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/Sources/AccelerateLinux/VectorOps/vDSP.swift b/Sources/AccelerateLinux/VectorOps/vDSP.swift index 0151f5a..916f0e1 100644 --- a/Sources/AccelerateLinux/VectorOps/vDSP.swift +++ b/Sources/AccelerateLinux/VectorOps/vDSP.swift @@ -57,15 +57,48 @@ public enum vDSP { @inlinable @inline(__always) public static func sum(_ vector: U) -> Double where U: AccelerateBuffer, U.Element == Double { - vector.withUnsafeBufferPointer { ptr in - var sum: Double = 0 - var i = ptr.startIndex - while i < ptr.endIndex { - sum += ptr[i] - i += 1 + if vector.count == 0 { return 0.0 } + if vector.count <= 8 { + var sum: Double = 0.0 + vector.withUnsafeBufferPointer { buffer in + for i in buffer.indices { + sum += buffer[i] + } } return sum } + vector.withUnsafeBufferPointer { buffer in + guard let baseAddress = buffer.baseAddress else { return 0.0 } + + let count = buffer.count + var sum1: Double = 0.0 + var sum2: Double = 0.0 + var sum3: Double = 0.0 + var sum4: Double = 0.0 + var sum5: Double = 0.0 + var sum6: Double = 0.0 + var sum7: Double = 0.0 + var sum8: Double = 0.0 + + let vectorCount = count - (count % 8) + for i in stride(from: 0, to: vectorCount, by: 8) { + sum1 += baseAddress[i] + sum2 += baseAddress[i + 1] + sum3 += baseAddress[i + 2] + sum4 += baseAddress[i + 3] + sum5 += baseAddress[i + 4] + sum6 += baseAddress[i + 5] + sum7 += baseAddress[i + 6] + sum8 += baseAddress[i + 7] + } + + var remainingSum: Double = 0.0 + for i in vectorCount.. Date: Mon, 19 May 2025 11:28:31 +0200 Subject: [PATCH 2/3] Clean up --- Sources/AccelerateLinux/VectorOps/vDSP.swift | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Sources/AccelerateLinux/VectorOps/vDSP.swift b/Sources/AccelerateLinux/VectorOps/vDSP.swift index 916f0e1..0e77041 100644 --- a/Sources/AccelerateLinux/VectorOps/vDSP.swift +++ b/Sources/AccelerateLinux/VectorOps/vDSP.swift @@ -58,15 +58,19 @@ public enum vDSP { @inline(__always) public static func sum(_ vector: U) -> Double where U: AccelerateBuffer, U.Element == Double { if vector.count == 0 { return 0.0 } + if vector.count <= 8 { var sum: Double = 0.0 vector.withUnsafeBufferPointer { buffer in - for i in buffer.indices { + var i = 0 + while i < vector.count { sum += buffer[i] + i += 1 } } return sum } + vector.withUnsafeBufferPointer { buffer in guard let baseAddress = buffer.baseAddress else { return 0.0 } From 8e623306e8270976bb510c79561a8af4aa1abd17 Mon Sep 17 00:00:00 2001 From: Paul Toffoloni Date: Mon, 19 May 2025 09:31:17 +0000 Subject: [PATCH 3/3] Fix error --- Sources/AccelerateLinux/VectorOps/vDSP.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/AccelerateLinux/VectorOps/vDSP.swift b/Sources/AccelerateLinux/VectorOps/vDSP.swift index 0e77041..1157a45 100644 --- a/Sources/AccelerateLinux/VectorOps/vDSP.swift +++ b/Sources/AccelerateLinux/VectorOps/vDSP.swift @@ -71,7 +71,7 @@ public enum vDSP { return sum } - vector.withUnsafeBufferPointer { buffer in + return vector.withUnsafeBufferPointer { buffer in guard let baseAddress = buffer.baseAddress else { return 0.0 } let count = buffer.count