diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseByteArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseByteArray.java new file mode 100644 index 00000000..da610227 --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseByteArray.java @@ -0,0 +1,58 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.LongBinaryOperator; + +/// Lazy Sparse-encoded {@link ByteArray}. See {@link LazySparseLongArray} for semantics. +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position (raw signed byte) +/// @param fillInt value at every unpatched position widened to int (unsigned-aware for U8) +/// @param patchValues values for patched positions +/// @param patchIndices sorted absolute positions of patches +/// @param offset starting absolute position +public record LazySparseByteArray( + DType dtype, long length, byte fillValue, int fillInt, + ByteArray patchValues, Array patchIndices, long offset) + implements ByteArray { + + @Override + public byte getByte(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getByte(p) : fillValue; + } + + @Override + public int getInt(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getInt(p) : fillInt; + } + + @Override + public long fold(long identity, LongBinaryOperator op) { + long[] acc = {identity}; + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + acc[0] = op.applyAsLong(acc[0], fillInt); + } + acc[0] = op.applyAsLong(acc[0], patchValues.getInt(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + acc[0] = op.applyAsLong(acc[0], fillInt); + } + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseDoubleArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseDoubleArray.java new file mode 100644 index 00000000..ab8116e4 --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseDoubleArray.java @@ -0,0 +1,57 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.DoubleBinaryOperator; +import java.util.function.DoubleConsumer; + +/// Lazy Sparse-encoded {@link DoubleArray}. See {@link LazySparseLongArray} for semantics. +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position +/// @param patchValues values for patched positions +/// @param patchIndices sorted absolute positions of patches +/// @param offset starting absolute position +public record LazySparseDoubleArray( + DType dtype, long length, double fillValue, + DoubleArray patchValues, Array patchIndices, long offset) + implements DoubleArray { + + @Override + public double getDouble(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getDouble(p) : fillValue; + } + + @Override + public void forEachDouble(DoubleConsumer c) { + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + c.accept(fillValue); + } + c.accept(patchValues.getDouble(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + c.accept(fillValue); + } + } + + @Override + public double fold(double identity, DoubleBinaryOperator op) { + double[] acc = {identity}; + forEachDouble(v -> acc[0] = op.applyAsDouble(acc[0], v)); + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseFloatArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseFloatArray.java new file mode 100644 index 00000000..656d74a7 --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseFloatArray.java @@ -0,0 +1,51 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.DoubleBinaryOperator; + +/// Lazy Sparse-encoded {@link FloatArray}. See {@link LazySparseLongArray} for semantics. +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position +/// @param patchValues values for patched positions +/// @param patchIndices sorted absolute positions of patches +/// @param offset starting absolute position +public record LazySparseFloatArray( + DType dtype, long length, float fillValue, + FloatArray patchValues, Array patchIndices, long offset) + implements FloatArray { + + @Override + public float getFloat(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getFloat(p) : fillValue; + } + + @Override + public double fold(double identity, DoubleBinaryOperator op) { + double[] acc = {identity}; + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + acc[0] = op.applyAsDouble(acc[0], fillValue); + } + acc[0] = op.applyAsDouble(acc[0], patchValues.getFloat(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + acc[0] = op.applyAsDouble(acc[0], fillValue); + } + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseIntArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseIntArray.java new file mode 100644 index 00000000..97dcde3c --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseIntArray.java @@ -0,0 +1,57 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.IntBinaryOperator; +import java.util.function.IntConsumer; + +/// Lazy Sparse-encoded {@link IntArray}. See {@link LazySparseLongArray} for semantics. +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position +/// @param patchValues values for patched positions +/// @param patchIndices sorted absolute positions of patches +/// @param offset starting absolute position +public record LazySparseIntArray( + DType dtype, long length, int fillValue, + IntArray patchValues, Array patchIndices, long offset) + implements IntArray { + + @Override + public int getInt(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getInt(p) : fillValue; + } + + @Override + public void forEachInt(IntConsumer c) { + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + c.accept(fillValue); + } + c.accept(patchValues.getInt(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + c.accept(fillValue); + } + } + + @Override + public int fold(int identity, IntBinaryOperator op) { + int[] acc = {identity}; + forEachInt(v -> acc[0] = op.applyAsInt(acc[0], v)); + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseLongArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseLongArray.java new file mode 100644 index 00000000..84f35774 --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseLongArray.java @@ -0,0 +1,65 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.LongBinaryOperator; +import java.util.function.LongConsumer; + +/// Lazy Sparse-encoded {@link LongArray}: {@code getLong(i) = patches[binSearch(i + offset)] +/// or fillValue}. The {@code patchIndices} array is typed as {@link Array} because the +/// indices ptype varies — backed by one of {@link ByteArray}, {@link ShortArray}, +/// {@link IntArray}, {@link LongArray}. +/// +/// {@code forEachLong} / {@code fold} walk the patches in order, emitting {@code fillValue} +/// for runs of unpatched positions — O(numPatches) binary-search-equivalent steps plus +/// {@code length} emissions, not O(length × log(numPatches)). +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position +/// @param patchValues values for patched positions; length = {@code numPatches} +/// @param patchIndices sorted absolute positions of patches; length = {@code numPatches} +/// @param offset starting absolute position; logical row {@code i} maps to +/// absolute {@code i + offset} +public record LazySparseLongArray( + DType dtype, long length, long fillValue, + LongArray patchValues, Array patchIndices, long offset) + implements LongArray { + + @Override + public long getLong(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getLong(p) : fillValue; + } + + @Override + public void forEachLong(LongConsumer c) { + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + c.accept(fillValue); + } + c.accept(patchValues.getLong(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + c.accept(fillValue); + } + } + + @Override + public long fold(long identity, LongBinaryOperator op) { + long[] acc = {identity}; + forEachLong(v -> acc[0] = op.applyAsLong(acc[0], v)); + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseShortArray.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseShortArray.java new file mode 100644 index 00000000..882ef3ea --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/LazySparseShortArray.java @@ -0,0 +1,58 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; + +import java.util.function.LongBinaryOperator; + +/// Lazy Sparse-encoded {@link ShortArray}. See {@link LazySparseLongArray} for semantics. +/// +/// @param dtype logical element type +/// @param length total logical row count +/// @param fillValue value at every unpatched position (raw signed short) +/// @param fillInt value at every unpatched position widened to int (unsigned-aware for U16) +/// @param patchValues values for patched positions +/// @param patchIndices sorted absolute positions of patches +/// @param offset starting absolute position +public record LazySparseShortArray( + DType dtype, long length, short fillValue, int fillInt, + ShortArray patchValues, Array patchIndices, long offset) + implements ShortArray { + + @Override + public short getShort(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getShort(p) : fillValue; + } + + @Override + public int getInt(long i) { + int p = SparseArrays.findPatch(patchIndices, patchValues.length(), i + offset); + return p >= 0 ? patchValues.getInt(p) : fillInt; + } + + @Override + public long fold(long identity, LongBinaryOperator op) { + long[] acc = {identity}; + long numPatches = patchValues.length(); + long absStart = offset; + long absEnd = offset + length; + int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart); + long pos = absStart; + while (pos < absEnd && p < numPatches) { + long patchAbs = SparseArrays.readPatchIdx(patchIndices, p); + if (patchAbs >= absEnd) { + break; + } + for (long r = pos; r < patchAbs; r++) { + acc[0] = op.applyAsLong(acc[0], fillInt); + } + acc[0] = op.applyAsLong(acc[0], patchValues.getInt(p)); + pos = patchAbs + 1; + p++; + } + for (long r = pos; r < absEnd; r++) { + acc[0] = op.applyAsLong(acc[0], fillInt); + } + return acc[0]; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/array/SparseArrays.java b/reader/src/main/java/io/github/dfa1/vortex/reader/array/SparseArrays.java new file mode 100644 index 00000000..4e4896f3 --- /dev/null +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/array/SparseArrays.java @@ -0,0 +1,67 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.VortexException; + +/// Package-private helpers shared by the {@code LazySparseXxxArray} records. +/// +/// Centralises: +/// - the patch-indices array-type switch in [#readPatchIdx(Array, long)] so all six +/// records agree on supported patch-index Array types (U8/U16/U32/U64 backed by +/// {@link ByteArray}/{@link ShortArray}/{@link IntArray}/{@link LongArray}); and +/// - the two binary-search variants used by scalar / sequential accessors: +/// [#findPatch(Array, long, long)] for exact hit-or-miss and +/// [#findFirstAtOrAfter(Array, long, long)] for the forEach run-walker. +final class SparseArrays { + + private SparseArrays() { + } + + /// Reads patch-index {@code k} from {@code idxArr} as an unsigned long. + static long readPatchIdx(Array idxArr, long k) { + return switch (idxArr) { + case ByteArray ba -> Byte.toUnsignedLong(ba.getByte(k)); + case ShortArray sa -> Short.toUnsignedLong(sa.getShort(k)); + case IntArray ia -> Integer.toUnsignedLong(ia.getInt(k)); + case LongArray la -> la.getLong(k); + default -> throw new VortexException( + "Sparse patch-indices: unsupported array type: " + idxArr.getClass().getSimpleName()); + }; + } + + /// Returns the index of the patch whose stored position equals {@code absPos}, or + /// {@code -1} if no such patch exists. Binary searches over {@code [0, numPatches)}. + static int findPatch(Array idxArr, long numPatches, long absPos) { + int lo = 0; + int hi = (int) numPatches - 1; + while (lo <= hi) { + int mid = (lo + hi) >>> 1; + long v = readPatchIdx(idxArr, mid); + if (v == absPos) { + return mid; + } + if (v < absPos) { + lo = mid + 1; + } else { + hi = mid - 1; + } + } + return -1; + } + + /// Returns the smallest index {@code k} such that {@code idxArr[k] >= absPos}, or + /// {@code (int) numPatches} when no such {@code k} exists. Used by the forEach + /// run-walker to locate the first patch that could fall inside the iteration range. + static int findFirstAtOrAfter(Array idxArr, long numPatches, long absPos) { + int lo = 0; + int hi = (int) numPatches; + while (lo < hi) { + int mid = (lo + hi) >>> 1; + if (readPatchIdx(idxArr, mid) < absPos) { + lo = mid + 1; + } else { + hi = mid; + } + } + return lo; + } +} diff --git a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/SparseEncodingDecoder.java b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/SparseEncodingDecoder.java index 9596b72f..01b32fed 100644 --- a/reader/src/main/java/io/github/dfa1/vortex/reader/decode/SparseEncodingDecoder.java +++ b/reader/src/main/java/io/github/dfa1/vortex/reader/decode/SparseEncodingDecoder.java @@ -10,13 +10,20 @@ import io.github.dfa1.vortex.proto.SparseMetadata; import io.github.dfa1.vortex.reader.array.Array; import io.github.dfa1.vortex.reader.array.BoolArray; +import io.github.dfa1.vortex.reader.array.ByteArray; +import io.github.dfa1.vortex.reader.array.DoubleArray; +import io.github.dfa1.vortex.reader.array.FloatArray; +import io.github.dfa1.vortex.reader.array.IntArray; +import io.github.dfa1.vortex.reader.array.LazySparseByteArray; +import io.github.dfa1.vortex.reader.array.LazySparseDoubleArray; +import io.github.dfa1.vortex.reader.array.LazySparseFloatArray; +import io.github.dfa1.vortex.reader.array.LazySparseIntArray; +import io.github.dfa1.vortex.reader.array.LazySparseLongArray; +import io.github.dfa1.vortex.reader.array.LazySparseShortArray; +import io.github.dfa1.vortex.reader.array.LongArray; +import io.github.dfa1.vortex.reader.array.MaskedArray; import io.github.dfa1.vortex.reader.array.MaterializedBoolArray; -import io.github.dfa1.vortex.reader.array.MaterializedByteArray; -import io.github.dfa1.vortex.reader.array.MaterializedDoubleArray; -import io.github.dfa1.vortex.reader.array.MaterializedFloatArray; -import io.github.dfa1.vortex.reader.array.MaterializedIntArray; -import io.github.dfa1.vortex.reader.array.MaterializedLongArray; -import io.github.dfa1.vortex.reader.array.MaterializedShortArray; +import io.github.dfa1.vortex.reader.array.ShortArray; import io.github.dfa1.vortex.reader.array.VarBinArray; import java.io.IOException; @@ -83,26 +90,36 @@ public Array decode(DecodeContext ctx) { } catch (IOException e) { throw new VortexException(EncodingId.VORTEX_SPARSE, "invalid fill value", e); } + long fillBits = scalarToLong(fillScalar); - int elemBytes = valuePtype.byteSize(); - MemorySegment out = ctx.arena().allocate(n * elemBytes); - fillSegment(out, n, valuePtype, fillScalar); - - if (numPatches > 0) { - DType indicesDtype = new DType.Primitive(indicesPtype, false); - applyPatches(out, n, valuePtype, - ctx.decodeChildSegment(0, indicesDtype, numPatches), - ctx.decodeChildSegment(1, ctx.dtype(), numPatches), - indicesPtype, numPatches, offset); - } + // Lazy path: keep fill bits + decoded patches; no n-sized buffer allocated. + DType indicesDtype = new DType.Primitive(indicesPtype, false); + Array patchIndices = numPatches > 0 + ? ctx.decodeChild(0, indicesDtype, numPatches) + : null; + Array patchValues = numPatches > 0 + ? ctx.decodeChild(1, ctx.dtype(), numPatches) + : null; + Array idxData = patchIndices instanceof MaskedArray m ? m.inner() : patchIndices; + Array valData = patchValues instanceof MaskedArray m ? m.inner() : patchValues; return switch (valuePtype) { - case I64, U64 -> new MaterializedLongArray(ctx.dtype(), n, out); - case I32, U32 -> new MaterializedIntArray(ctx.dtype(), n, out); - case F64 -> new MaterializedDoubleArray(ctx.dtype(), n, out); - case F32 -> new MaterializedFloatArray(ctx.dtype(), n, out); - case I16, U16 -> new MaterializedShortArray(ctx.dtype(), n, out); - case I8, U8 -> new MaterializedByteArray(ctx.dtype(), n, out); + case I64, U64 -> new LazySparseLongArray(ctx.dtype(), n, fillBits, + (LongArray) valData, idxData, offset); + case I32, U32 -> new LazySparseIntArray(ctx.dtype(), n, (int) fillBits, + (IntArray) valData, idxData, offset); + case F64 -> new LazySparseDoubleArray(ctx.dtype(), n, Double.longBitsToDouble(fillBits), + (DoubleArray) valData, idxData, offset); + case F32 -> new LazySparseFloatArray(ctx.dtype(), n, Float.intBitsToFloat((int) fillBits), + (FloatArray) valData, idxData, offset); + case I16 -> new LazySparseShortArray(ctx.dtype(), n, (short) fillBits, (short) fillBits, + (ShortArray) valData, idxData, offset); + case U16 -> new LazySparseShortArray(ctx.dtype(), n, (short) fillBits, (int) (fillBits & 0xFFFFL), + (ShortArray) valData, idxData, offset); + case I8 -> new LazySparseByteArray(ctx.dtype(), n, (byte) fillBits, (byte) fillBits, + (ByteArray) valData, idxData, offset); + case U8 -> new LazySparseByteArray(ctx.dtype(), n, (byte) fillBits, (int) (fillBits & 0xFFL), + (ByteArray) valData, idxData, offset); default -> throw new VortexException(EncodingId.VORTEX_SPARSE, "unsupported ptype " + valuePtype); }; } @@ -183,34 +200,6 @@ private static long readVarBinOffset(MemorySegment seg, long i, PType ptype) { }; } - private static void fillSegment(MemorySegment out, long n, PType ptype, ScalarValue scalar) { - long fillLong = scalarToLong(scalar); - ByteBuffer bb = out.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - for (long i = 0; i < n; i++) { - writeElem(bb, ptype, fillLong); - } - } - - private static void applyPatches( - MemorySegment out, long n, PType valuePtype, - MemorySegment idxSeg, MemorySegment valSeg, - PType idxPtype, long numPatches, long offset - ) { - int elemBytes = valuePtype.byteSize(); - int idxBytes = idxPtype.byteSize(); - ByteBuffer outBuf = out.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN); - for (long i = 0; i < numPatches; i++) { - long idx = readUnsignedIdx(idxSeg, SegmentBroadcast.elementOffset(idxSeg, i, idxBytes), idxPtype) - offset; - if (idx < 0 || idx >= n) { - throw new VortexException(EncodingId.VORTEX_SPARSE, - "patch index " + idx + " out of range [0," + n + ")"); - } - long val = readElem(valSeg, SegmentBroadcast.elementOffset(valSeg, i, elemBytes), valuePtype); - outBuf.position((int) (idx * elemBytes)); - writeElem(outBuf, valuePtype, val); - } - } - private static long readUnsignedIdx(MemorySegment seg, long off, PType ptype) { return switch (ptype) { case U8 -> Byte.toUnsignedLong(seg.get(ValueLayout.JAVA_BYTE, off)); @@ -221,26 +210,6 @@ private static long readUnsignedIdx(MemorySegment seg, long off, PType ptype) { }; } - private static long readElem(MemorySegment seg, long off, PType ptype) { - return switch (ptype) { - case I8, U8 -> Byte.toUnsignedLong(seg.get(ValueLayout.JAVA_BYTE, off)); - case I16, U16 -> Short.toUnsignedLong(seg.get(PTypeIO.LE_SHORT, off)); - case I32, U32 -> Integer.toUnsignedLong(seg.get(PTypeIO.LE_INT, off)); - case I64, U64, F32, F64 -> seg.get(PTypeIO.LE_LONG, off); - default -> throw new UnsupportedOperationException("vortex.sparse: unsupported ptype " + ptype); - }; - } - - private static void writeElem(ByteBuffer bb, PType ptype, long bits) { - switch (ptype) { - case I8, U8 -> bb.put((byte) bits); - case I16, U16 -> bb.putShort((short) bits); - case I32, U32 -> bb.putInt((int) bits); - case I64, U64, F32, F64 -> bb.putLong(bits); - default -> throw new UnsupportedOperationException("vortex.sparse: unsupported ptype " + ptype); - } - } - private static long scalarToLong(ScalarValue scalar) { if (scalar.int64_value() != null) { return scalar.int64_value(); diff --git a/reader/src/test/java/io/github/dfa1/vortex/reader/array/LazySparseArrayTest.java b/reader/src/test/java/io/github/dfa1/vortex/reader/array/LazySparseArrayTest.java new file mode 100644 index 00000000..c9962ab2 --- /dev/null +++ b/reader/src/test/java/io/github/dfa1/vortex/reader/array/LazySparseArrayTest.java @@ -0,0 +1,160 @@ +package io.github.dfa1.vortex.reader.array; + +import io.github.dfa1.vortex.core.DType; +import io.github.dfa1.vortex.core.PType; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.util.ArrayList; + +import static org.assertj.core.api.Assertions.assertThat; + +/// Unit tests for the lazy Sparse records. Covers fill vs patch dispatch, ordered +/// forEach iteration, fold reduction, and offset slicing semantics. +class LazySparseArrayTest { + + private static final DType I64 = new DType.Primitive(PType.I64, false); + private static final DType I32 = new DType.Primitive(PType.I32, false); + private static final DType F64 = new DType.Primitive(PType.F64, false); + + @Nested + class Long { + + @Test + void unpatchedPositionsReturnFill() { + try (Arena arena = Arena.ofConfined()) { + // length=5, fill=99, patches at index 1 → 7, index 3 → 11 + LongArray values = longArray(arena, 7L, 11L); + Array indices = intArray(arena, 1, 3); + var sut = new LazySparseLongArray(I64, 5, 99L, values, indices, 0L); + + assertThat(sut.getLong(0)).isEqualTo(99L); + assertThat(sut.getLong(1)).isEqualTo(7L); + assertThat(sut.getLong(2)).isEqualTo(99L); + assertThat(sut.getLong(3)).isEqualTo(11L); + assertThat(sut.getLong(4)).isEqualTo(99L); + } + } + + @Test + void forEachEmitsInOrder() { + try (Arena arena = Arena.ofConfined()) { + LongArray values = longArray(arena, 7L, 11L); + Array indices = intArray(arena, 1, 3); + var sut = new LazySparseLongArray(I64, 5, 99L, values, indices, 0L); + + var seen = new ArrayList(); + sut.forEachLong(seen::add); + + assertThat(seen).containsExactly(99L, 7L, 99L, 11L, 99L); + } + } + + @Test + void foldSumsFillAndPatches() { + try (Arena arena = Arena.ofConfined()) { + LongArray values = longArray(arena, 7L, 11L); + Array indices = intArray(arena, 1, 3); + var sut = new LazySparseLongArray(I64, 5, 99L, values, indices, 0L); + + long sum = sut.fold(0L, java.lang.Long::sum); + + // 99 + 7 + 99 + 11 + 99 = 315 + assertThat(sum).isEqualTo(315L); + } + } + + @Test + void offsetSkipsLeadingPatches() { + try (Arena arena = Arena.ofConfined()) { + // length=3 covering abs [4..7), fill=1, patches at abs 4 and 6 + LongArray values = longArray(arena, 10L, 11L, 12L); + Array indices = intArray(arena, 1, 4, 6); + var sut = new LazySparseLongArray(I64, 3, 1L, values, indices, 4L); + + assertThat(sut.getLong(0)).isEqualTo(11L); + assertThat(sut.getLong(1)).isEqualTo(1L); + assertThat(sut.getLong(2)).isEqualTo(12L); + } + } + + @Test + void noPatchesIsAllFill() { + try (Arena arena = Arena.ofConfined()) { + LongArray values = longArray(arena); + Array indices = intArray(arena); + var sut = new LazySparseLongArray(I64, 3, 42L, values, indices, 0L); + + var seen = new ArrayList(); + sut.forEachLong(seen::add); + + assertThat(seen).containsExactly(42L, 42L, 42L); + } + } + } + + @Nested + class IntAndDouble { + + @Test + void intPatchDispatches() { + try (Arena arena = Arena.ofConfined()) { + IntArray values = intArray(arena, 100, 200); + Array indices = intArray(arena, 0, 2); + var sut = new LazySparseIntArray(I32, 3, 5, values, indices, 0L); + + assertThat(sut.getInt(0)).isEqualTo(100); + assertThat(sut.getInt(1)).isEqualTo(5); + assertThat(sut.getInt(2)).isEqualTo(200); + } + } + + @Test + void doublePatchDispatches() { + try (Arena arena = Arena.ofConfined()) { + DoubleArray values = doubleArray(arena, 1.5, 2.5); + Array indices = intArray(arena, 0, 2); + var sut = new LazySparseDoubleArray(F64, 3, 0.0, values, indices, 0L); + + assertThat(sut.getDouble(0)).isEqualTo(1.5); + assertThat(sut.getDouble(1)).isEqualTo(0.0); + assertThat(sut.getDouble(2)).isEqualTo(2.5); + } + } + } + + private static LongArray longArray(Arena arena, long... vs) { + if (vs.length == 0) { + return new MaterializedLongArray(I64, 0, + arena.allocate(1L, 8).asReadOnly().asSlice(0, 0)); + } + MemorySegment seg = arena.allocate(vs.length * 8L, 8); + for (int i = 0; i < vs.length; i++) { + seg.setAtIndex(ValueLayout.JAVA_LONG, i, vs[i]); + } + return new MaterializedLongArray(I64, vs.length, seg.asReadOnly()); + } + + private static IntArray intArray(Arena arena, int... vs) { + if (vs.length == 0) { + return new MaterializedIntArray(I32, 0, + arena.allocate(1L, 4).asReadOnly().asSlice(0, 0)); + } + MemorySegment seg = arena.allocate(vs.length * 4L, 4); + for (int i = 0; i < vs.length; i++) { + seg.setAtIndex(ValueLayout.JAVA_INT, i, vs[i]); + } + return new MaterializedIntArray(I32, vs.length, seg.asReadOnly()); + } + + private static DoubleArray doubleArray(Arena arena, double... vs) { + MemorySegment seg = arena.allocate(vs.length * 8L, 8); + for (int i = 0; i < vs.length; i++) { + seg.setAtIndex(ValueLayout.JAVA_DOUBLE, i, vs[i]); + } + return new MaterializedDoubleArray(F64, vs.length, seg.asReadOnly()); + } +}