Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.8.0] — 2026-06-18

Variant encode/decode and the Materialized-fallback sweep.

### Added

- Writer: `vortex.variant` encoder. Encodes a variant column as the canonical `vortex.variant` container over `core_storage` — an all-equal column becomes a single `vortex.constant`, a row-varying column a `vortex.chunked` of per-run constants — with an optional row-aligned typed `shredded` child recorded in `VariantMetadata.shredded_dtype`. Input is `VariantData(List<Scalar>)` with `.constant(n, v)` / `.shredded(...)` factories. Java↔Rust (JNI) round-trip verified for constant, row-varying, and shredded columns. Scalar values only — arbitrary nested objects need `vortex.parquet.variant` (deferred, [ADR 0014](docs/adr/0014-variant-encoding-strategy.md)).
- Reader: variant columns now decode Java-side. `ConstantEncodingDecoder` and `ChunkedEncodingDecoder` handle `DType.Variant` (materialising the inner-typed array); `VariantEncodingDecoder` wraps the result as `VariantArray`, exposing `coreStorage()` and `shredded()`.

### Changed

- Decode shape: transform encodings now decode **lazy-only**. The eager `Materialized*Array` fallbacks were removed from `vortex.zigzag` (all PTypes + broadcast), `fastlanes.for` (all integer PTypes), `vortex.alp` (broadcast-without-patches), `vortex.constant` (Decimal → `LazyConstantDecimalArray`), `vortex.runend` (Bool → `LazyRunEndBoolArray`), `vortex.sparse` (Bool → `LazySparseBoolArray`), and `fastlanes.rle` (validity → `OffsetBoolArray`, empty → `LazyConstantXxxArray`). Decompression encodings (`bitpacked`, `pco`, `zstd`, `fsst`, `delta`, `patched`), the primitive base, the `vortex.dict` encoding-level path, and the `vortex.alp` patches path stay Materialized by design. See [ADR 0015](docs/adr/0015-drop-materialized-fallbacks.md).

## [0.7.3] — 2026-06-17

Parquet ZSTD support, `vortex.patched` encoder, constant-encoding selection fix, Windows TUI raw-mode fix.
Expand Down
106 changes: 106 additions & 0 deletions cli/src/main/java/io/github/dfa1/vortex/cli/tui/GridRender.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package io.github.dfa1.vortex.cli.tui;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.extension.ExtensionId;
import io.github.dfa1.vortex.reader.array.Array;
import io.github.dfa1.vortex.reader.array.BoolArray;
import io.github.dfa1.vortex.reader.array.ByteArray;
import io.github.dfa1.vortex.reader.array.DecimalArray;
import io.github.dfa1.vortex.reader.array.DoubleArray;
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.array.IntArray;
import io.github.dfa1.vortex.reader.array.LongArray;
import io.github.dfa1.vortex.reader.array.MaskedArray;
import io.github.dfa1.vortex.reader.array.ShortArray;
import io.github.dfa1.vortex.reader.array.VarBinArray;
import io.github.dfa1.vortex.reader.extension.DateExtensionDecoder;
import io.github.dfa1.vortex.reader.extension.TimeExtensionDecoder;
import io.github.dfa1.vortex.reader.extension.TimestampExtensionDecoder;
import io.github.dfa1.vortex.reader.extension.UuidExtensionDecoder;

import java.util.Optional;

/// Pure cell-formatting helpers for the grid viewer. Stateless: turns one decoded
/// array element into the display string shown in a grid cell. Extracted from
/// [LazyGridSource] so the per-type and per-extension branches can be unit-tested
/// against in-memory arrays without a terminal or an encoded fixture.
final class GridRender {

private GridRender() {
}

/// Formats the element at logical index `i` of `array` for display.
///
/// Returns the empty string for out-of-range indices and masked-out (null)
/// cells. Extension types (date/time/timestamp/uuid) are rendered via their
/// decoders; on any decode failure an angle-bracketed diagnostic is returned.
///
/// @param array the column array (may be a [MaskedArray])
/// @param i logical element index
/// @param declared declared logical type (drives extension rendering)
/// @return formatted cell text
static String formatCell(Array array, long i, DType declared) {
if (array == null || i >= array.length()) {
return "";
}
if (array instanceof MaskedArray m && !m.isValid(i)) {
return "";
}
Array inner = array instanceof MaskedArray m ? m.inner() : array;
if (i >= inner.length()) {
return "";
}
try {
if (declared instanceof DType.Extension ext) {
Optional<String> extFormatted = formatExtension(ext, inner, i);
if (extFormatted.isPresent()) {
return extFormatted.get();
}
}
return switch (inner) {
case LongArray a -> Long.toString(a.getLong(i));
case IntArray a -> Integer.toString(a.getInt(i));
case ShortArray a -> Short.toString(a.getShort(i));
case ByteArray a -> Byte.toString(a.getByte(i));
case DoubleArray a -> Double.toString(a.getDouble(i));
case FloatArray a -> Float.toString(a.getFloat(i));
case BoolArray a -> Boolean.toString(a.getBoolean(i));
case VarBinArray a -> a.dtype() instanceof DType.Utf8
? a.getString(i)
: bytesToHex(a.getBytes(i));
case DecimalArray a -> a.getDecimal(i).toPlainString();
default -> "<" + inner.getClass().getSimpleName() + ">";
};
} catch (RuntimeException e) {
String msg = e.getMessage();
return "<" + e.getClass().getSimpleName()
+ (msg != null ? ": " + msg.split("\n", 2)[0] : "") + ">";
}
}

private static Optional<String> formatExtension(DType.Extension ext, Array storage, long i) {
Optional<ExtensionId> idOpt = ExtensionId.parse(ext.extensionId());
if (idOpt.isEmpty()) {
return Optional.empty();
}
return Optional.of(switch (idOpt.get()) {
case VORTEX_DATE -> DateExtensionDecoder.INSTANCE.decode(storage, i).toString();
case VORTEX_TIME -> TimeExtensionDecoder.INSTANCE.decode(ext, storage, i).toString();
case VORTEX_TIMESTAMP -> TimestampExtensionDecoder.INSTANCE.instant(ext, storage, i).toString();
case VORTEX_UUID -> UuidExtensionDecoder.INSTANCE.decode(storage, i).toString();
});
}

private static String bytesToHex(byte[] bytes) {
int n = Math.min(bytes.length, 16);
StringBuilder sb = new StringBuilder(n * 2 + 2);
sb.append("0x");
for (int i = 0; i < n; i++) {
sb.append(String.format("%02x", bytes[i] & 0xff));
}
if (bytes.length > n) {
sb.append("...");
}
return sb.toString();
}
}
199 changes: 199 additions & 0 deletions cli/src/main/java/io/github/dfa1/vortex/cli/tui/InspectorRender.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
package io.github.dfa1.vortex.cli.tui;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.extension.ExtensionId;
import io.github.dfa1.vortex.reader.array.Array;
import io.github.dfa1.vortex.reader.array.BoolArray;
import io.github.dfa1.vortex.reader.array.ByteArray;
import io.github.dfa1.vortex.reader.array.DecimalArray;
import io.github.dfa1.vortex.reader.array.DoubleArray;
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.array.GenericArray;
import io.github.dfa1.vortex.reader.array.IntArray;
import io.github.dfa1.vortex.reader.array.LongArray;
import io.github.dfa1.vortex.reader.array.MaskedArray;
import io.github.dfa1.vortex.reader.array.ShortArray;
import io.github.dfa1.vortex.reader.array.StructArray;
import io.github.dfa1.vortex.reader.array.VarBinArray;
import io.github.dfa1.vortex.reader.extension.DateExtensionDecoder;

import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.function.LongFunction;

/// Pure rendering helpers for [VortexInspectorTui]. Stateless string formatters
/// extracted from the event loop so they can be unit-tested against in-memory
/// arrays without a terminal, an [IoWorker], or an encoded fixture file.
final class InspectorRender {

private InspectorRender() {
}

/// Formats one array element for the detail/data preview.
///
/// @param array array holding the value
/// @param i element index
/// @param declared declared logical type (drives extension rendering)
/// @return formatted value, or an angle-bracketed fallback for unknown shapes
static String formatValue(Array array, int i, DType declared) {
if (declared instanceof DType.Extension ext
&& ExtensionId.parse(ext.extensionId())
.filter(id -> id == ExtensionId.VORTEX_DATE)
.isPresent()) {
try {
return DateExtensionDecoder.INSTANCE.decode(array, i).toString();
} catch (RuntimeException e) {
// fall through to generic rendering on shape mismatch
}
}
return switch (array) {
case LongArray a -> Long.toString(a.getLong(i));
case IntArray a -> Integer.toString(a.getInt(i));
case ShortArray a -> Short.toString(a.getShort(i));
case ByteArray a -> Byte.toString(a.getByte(i));
case DoubleArray a -> Double.toString(a.getDouble(i));
case FloatArray a -> Float.toString(a.getFloat(i));
case BoolArray a -> Boolean.toString(a.getBoolean(i));
case VarBinArray a -> a.dtype() instanceof DType.Utf8
? "\"" + a.getString(i) + "\""
: bytesToShortHex(a.getBytes(i));
case GenericArray a when a.dtype() instanceof DType.Decimal ->
tryDecimal(a::getDecimal, a, i);
case DecimalArray a -> tryDecimal(a::getDecimal, a, i);
default -> "<" + array.getClass().getSimpleName() + " " + array.dtype() + ">";
};
}

/// Formats one struct of decoded zone-map statistics into a single display row.
///
/// @param arr stats array (possibly wrapped in a [MaskedArray])
/// @param statsDtype struct schema describing the stats fields
/// @return one `"field=value, ..."` string per stats row
static List<String> formatStatsArray(Array arr, DType.Struct statsDtype) {
Array unwrapped = arr instanceof MaskedArray m ? m.inner() : arr;
if (!(unwrapped instanceof StructArray sa)) {
throw new IllegalStateException(
"stats array is not a struct: " + arr.getClass().getSimpleName());
}
int n = (int) sa.length();
List<String> rows = new ArrayList<>(n);
for (int row = 0; row < n; row++) {
StringBuilder sb = new StringBuilder();
for (int f = 0; f < sa.fieldCount(); f++) {
if (f > 0) {
sb.append(", ");
}
String name = statsDtype.fieldNames().get(f);
DType fdtype = statsDtype.fieldTypes().get(f);
Array field = sa.field(f);
sb.append(name).append('=').append(formatStatsCell(field, row, fdtype));
}
rows.add(sb.toString());
}
return rows;
}

private static String formatStatsCell(Array field, int row, DType declared) {
if (field instanceof MaskedArray m) {
if (!m.isValid(row)) {
return "null";
}
return formatValue(m.inner(), row, declared);
}
return formatValue(field, row, declared);
}

private static String tryDecimal(LongFunction<BigDecimal> reader, Array a, int i) {
try {
return reader.apply(i).toPlainString();
} catch (RuntimeException e) {
String msg = e.getMessage();
if (msg != null && msg.contains("null cell")) {
return "null";
}
return "<" + a.getClass().getSimpleName() + " " + a.dtype() + ">";
}
}

private static String bytesToShortHex(byte[] bytes) {
int n = Math.min(bytes.length, 16);
StringBuilder sb = new StringBuilder(n * 3 + 2);
sb.append("0x");
for (int i = 0; i < n; i++) {
sb.append(String.format("%02x", bytes[i] & 0xff));
}
if (bytes.length > n) {
sb.append("...");
}
return sb.toString();
}

/// Formats one 16-byte row of a hex dump: offset, hex columns, ASCII gutter.
///
/// @param data the bytes being dumped
/// @param offset start offset of this row within `data`
/// @return the formatted `"%08x .. .. | .... |"` line
static String formatHexRow(byte[] data, int offset) {
StringBuilder sb = new StringBuilder(80);
sb.append(String.format("%08x ", offset));
for (int i = 0; i < 16; i++) {
int idx = offset + i;
if (idx < data.length) {
sb.append(String.format("%02x ", data[idx] & 0xff));
} else {
sb.append(" ");
}
if (i == 7) {
sb.append(' ');
}
}
sb.append(" |");
for (int i = 0; i < 16; i++) {
int idx = offset + i;
if (idx >= data.length) {
sb.append(' ');
continue;
}
int b = data[idx] & 0xff;
sb.append(b >= 0x20 && b < 0x7f ? (char) b : '.');
}
sb.append('|');
return sb.toString();
}

/// Formats a byte count as B / KB / MB.
///
/// @param bytes raw byte count
/// @return human-readable size string
static String formatBytes(long bytes) {
if (bytes < 1024) {
return bytes + " B";
}
if (bytes < 1024 * 1024) {
return String.format("%.1f KB", bytes / 1024.0);
}
return String.format("%.1f MB", bytes / (1024.0 * 1024.0));
}

/// Pads or truncates `s` to exactly `width` characters.
///
/// @param s source string
/// @param width target width
/// @return a string of length `width`
static String pad(String s, int width) {
if (s.length() >= width) {
return s.substring(0, width);
}
return s + " ".repeat(width - s.length());
}

/// Truncates `s` to at most `width` characters.
///
/// @param s source string
/// @param width maximum width
/// @return `s` unchanged if short enough, otherwise its first `width` characters
static String truncate(String s, int width) {
return s.length() > width ? s.substring(0, width) : s;
}
}
Loading