From 3f14cbe3f1ccbb0cbb91794aac4d9ae7cda833d6 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 00:01:28 -0700 Subject: [PATCH 01/14] feat(spark): add BatchedOrphanFilesDeletionSparkApp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-table OFD app that amortizes Spark-session startup across many tables in a single job, using a driver-side thread pool so each table's deletion runs concurrently. When --resultsEndpoint and --operationIds are supplied, each table's outcome (SUCCESS/FAILED with file count + bytes deleted, or errorMessage+errorType) is POSTed to the optimizer's complete-operation endpoint as it completes — letting the optimizer track per-table status independently of the overall job. Operations.java grows a deleteOrphanFilesWithMetrics() overload that delegates from the existing deleteOrphanFiles() and additionally tracks bytes deleted via an AtomicLong inside the deleteWith callback. The existing single-table OrphanFilesDeletionSparkApp is unchanged. Co-Authored-By: Claude Opus 4.7 --- .../BatchedOrphanFilesDeletionSparkApp.java | 377 ++++++++++++++++++ .../openhouse/jobs/spark/Operations.java | 56 ++- ...atchedOrphanFilesDeletionSparkAppTest.java | 338 ++++++++++++++++ 3 files changed, 770 insertions(+), 1 deletion(-) create mode 100644 apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java create mode 100644 apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppTest.java diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java new file mode 100644 index 000000000..0908e7afa --- /dev/null +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java @@ -0,0 +1,377 @@ +package com.linkedin.openhouse.jobs.spark; + +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.Lists; +import com.linkedin.openhouse.common.metrics.DefaultOtelConfig; +import com.linkedin.openhouse.common.metrics.OtelEmitter; +import com.linkedin.openhouse.jobs.spark.state.StateManager; +import com.linkedin.openhouse.jobs.util.AppConstants; +import com.linkedin.openhouse.jobs.util.AppsOtelEmitter; +import io.opentelemetry.api.common.AttributeKey; +import io.opentelemetry.api.common.Attributes; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import lombok.Builder; +import lombok.Value; +import lombok.extern.slf4j.Slf4j; +import okhttp3.MediaType; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Option; +import org.apache.commons.lang3.math.NumberUtils; +import org.apache.iceberg.Table; + +/** + * Runs orphan file deletion for a batch of tables in one Spark session. + * + *

Spinning up a Spark session per table is expensive. This app amortizes that cost by processing + * many tables in a single job, using a driver-side thread pool so each table's deletion runs + * concurrently without competing for executors. + * + *

When {@code --resultsEndpoint} is supplied, each table's outcome is POSTed to the optimizer + * service's complete-operation endpoint as it completes, letting the service track per-table status + * independently of the overall job. + */ +@Slf4j +public class BatchedOrphanFilesDeletionSparkApp extends BaseSparkApp { + private static final int DEFAULT_MAX_ORPHAN_FILE_SAMPLE_SIZE = 20000; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + private final List tableNames; + private final int parallelism; + private final long ttlSeconds; + private final String backupDir; + private final int concurrentDeletes; + private final boolean streamResults; + private final int maxOrphanFileSampleSize; + private final List operationIds; + private final String resultsEndpoint; + + public BatchedOrphanFilesDeletionSparkApp( + String jobId, + StateManager stateManager, + List tableNames, + int parallelism, + long ttlSeconds, + OtelEmitter otelEmitter, + String backupDir, + int concurrentDeletes, + boolean streamResults, + int maxOrphanFileSampleSize, + List operationIds, + String resultsEndpoint) { + super(jobId, stateManager, otelEmitter); + this.tableNames = tableNames; + this.parallelism = parallelism; + this.ttlSeconds = ttlSeconds; + this.backupDir = backupDir; + this.concurrentDeletes = concurrentDeletes; + this.streamResults = streamResults; + this.maxOrphanFileSampleSize = maxOrphanFileSampleSize; + this.operationIds = operationIds; + this.resultsEndpoint = resultsEndpoint; + } + + @Override + protected void runInner(Operations ops) throws Exception { + if (resultsEndpoint != null && operationIds.size() != tableNames.size()) { + throw new IllegalArgumentException( + "operationIds count (" + + operationIds.size() + + ") must equal tableNames count (" + + tableNames.size() + + ") when resultsEndpoint is provided"); + } + + Map tableToOperationId = new HashMap<>(); + if (resultsEndpoint != null) { + for (int i = 0; i < tableNames.size(); i++) { + tableToOperationId.put(tableNames.get(i), operationIds.get(i)); + } + } + + long olderThanTimestampMillis = + System.currentTimeMillis() - TimeUnit.SECONDS.toMillis(ttlSeconds); + + log.info( + "Starting batched orphan files deletion for {} tables with parallelism {}", + tableNames.size(), + parallelism); + + int poolSize = Math.min(parallelism, Math.max(1, tableNames.size())); + ExecutorService executor = Executors.newFixedThreadPool(poolSize); + List> futures = new ArrayList<>(); + + for (String tableName : tableNames) { + futures.add( + executor.submit( + () -> { + long startTime = System.currentTimeMillis(); + try { + Table table = ops.getTable(tableName); + boolean backupEnabled = + Boolean.parseBoolean( + table + .properties() + .getOrDefault(AppConstants.BACKUP_ENABLED_KEY, "false")); + + log.info("Processing orphan files for table: {}", tableName); + Operations.OrphanFilesResult result = + ops.deleteOrphanFilesWithMetrics( + table, + olderThanTimestampMillis, + backupEnabled, + backupDir, + concurrentDeletes, + streamResults, + maxOrphanFileSampleSize); + + List orphanFiles = + Lists.newArrayList(result.orphanFileLocations().iterator()); + long durationMs = System.currentTimeMillis() - startTime; + + log.info( + "Successfully processed table {}: {} orphan files deleted in {}ms", + tableName, + orphanFiles.size(), + durationMs); + + return OrphanDeletionResult.success( + tableName, orphanFiles.size(), result.getBytesDeleted(), durationMs); + + } catch (Exception e) { + long durationMs = System.currentTimeMillis() - startTime; + log.error("Failed to process table: {}", tableName, e); + return OrphanDeletionResult.failure(tableName, e, durationMs); + } + })); + } + + executor.shutdown(); + executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); + + List allResults = new ArrayList<>(); + for (Future future : futures) { + try { + allResults.add(future.get()); + } catch (ExecutionException e) { + throw new RuntimeException("Unexpected exception in table processing", e.getCause()); + } + } + + reportResults(allResults, tableToOperationId); + } + + private void reportResults( + List results, Map tableToOperationId) throws Exception { + OkHttpClient client = + resultsEndpoint != null + ? new OkHttpClient.Builder() + .connectTimeout(10, TimeUnit.SECONDS) + .readTimeout(30, TimeUnit.SECONDS) + .build() + : null; + + int failureCount = 0; + for (OrphanDeletionResult result : results) { + if (result.isSuccess()) { + otelEmitter.count( + METRICS_SCOPE, + AppConstants.ORPHAN_FILE_COUNT, + result.getOrphanFilesDeleted(), + Attributes.of(AttributeKey.stringKey(AppConstants.TABLE_NAME), result.getTableName())); + } else { + failureCount++; + } + + if (client != null) { + String opId = tableToOperationId.get(result.getTableName()); + if (opId != null) { + completeOperation(client, opId, result); + } + } + } + + if (client == null && failureCount > 0) { + throw new RuntimeException(failureCount + " table(s) failed in batch"); + } + if (client != null && failureCount == results.size()) { + throw new RuntimeException("All tables failed in batch"); + } + } + + private void completeOperation(OkHttpClient client, String id, OrphanDeletionResult result) + throws Exception { + OperationResult opResult = + result.isSuccess() + ? OperationResult.success(result.getOrphanFilesDeleted(), result.getBytesDeleted()) + : OperationResult.failure(result.getErrorMessage(), result.getErrorType()); + + String json = OBJECT_MAPPER.writeValueAsString(opResult); + RequestBody body = RequestBody.create(json, MediaType.parse("application/json; charset=utf-8")); + Request request = + new Request.Builder().url(resultsEndpoint + "/" + id + "/complete").post(body).build(); + try (Response response = client.newCall(request).execute()) { + int code = response.code(); + if (code < 200 || code >= 300) { + throw new RuntimeException("POST operation/" + id + "/complete returned HTTP " + code); + } + } + } + + /** Result of orphan deletion for a single table. */ + @Value + @Builder + public static class OrphanDeletionResult implements Serializable { + String tableName; + boolean success; + int orphanFilesDeleted; + long bytesDeleted; + long durationMs; + String errorMessage; + String errorType; + + public static OrphanDeletionResult success( + String tableName, int orphanFileCount, long bytesDeleted, long durationMs) { + return OrphanDeletionResult.builder() + .tableName(tableName) + .success(true) + .orphanFilesDeleted(orphanFileCount) + .bytesDeleted(bytesDeleted) + .durationMs(durationMs) + .build(); + } + + public static OrphanDeletionResult failure(String tableName, Throwable error, long durationMs) { + return OrphanDeletionResult.builder() + .tableName(tableName) + .success(false) + .orphanFilesDeleted(0) + .bytesDeleted(0) + .durationMs(durationMs) + .errorMessage(error.getMessage()) + .errorType(error.getClass().getSimpleName()) + .build(); + } + } + + /** POST payload sent to the optimizer service's complete-operation endpoint. */ + @JsonInclude(JsonInclude.Include.NON_NULL) + static class OperationResult { + public final String status; + public final ResultPayload result; + public final Integer orphanFilesDeleted; + public final Long orphanBytesDeleted; + + private OperationResult( + String status, ResultPayload result, Integer orphanFilesDeleted, Long orphanBytesDeleted) { + this.status = status; + this.result = result; + this.orphanFilesDeleted = orphanFilesDeleted; + this.orphanBytesDeleted = orphanBytesDeleted; + } + + static OperationResult success(int orphanFilesDeleted, long orphanBytesDeleted) { + return new OperationResult("SUCCESS", null, orphanFilesDeleted, orphanBytesDeleted); + } + + static OperationResult failure(String errorMessage, String errorType) { + return new OperationResult("FAILED", new ResultPayload(errorMessage, errorType), null, null); + } + } + + /** Error details nested inside {@link OperationResult}. */ + @JsonInclude(JsonInclude.Include.NON_NULL) + static class ResultPayload { + public final String errorMessage; + public final String errorType; + + ResultPayload(String errorMessage, String errorType) { + this.errorMessage = errorMessage; + this.errorType = errorType; + } + } + + public static void main(String[] args) { + OtelEmitter otelEmitter = + new AppsOtelEmitter(Arrays.asList(DefaultOtelConfig.getOpenTelemetry())); + createApp(args, otelEmitter).run(); + } + + public static BatchedOrphanFilesDeletionSparkApp createApp( + String[] args, OtelEmitter otelEmitter) { + List

These tests run the app's logic in-process against a real Spark session backed by an in-memory + * Iceberg catalog ({@link OpenHouseSparkITest}). The app is never submitted as an external Spark + * job; {@code runInner(ops)} is called directly. + * + *

When testing the per-table complete-operation callback path, the Optimizer service is replaced + * by an embedded {@link com.sun.net.httpserver.HttpServer} that captures request bodies. This + * verifies the shape and content of the JSON payloads without requiring a live service. + */ +@Slf4j +public class BatchedOrphanFilesDeletionSparkAppTest extends OpenHouseSparkITest { + private static final int MAX_SAMPLE_SIZE = 20000; + + private final OtelEmitter otelEmitter = + new AppsOtelEmitter(Arrays.asList(DefaultOtelConfig.getOpenTelemetry())); + + @Test + public void testSuccessfulOrphanFilesDeletionForMultipleTables() throws Exception { + final List tableNames = Arrays.asList("db.test_batch1", "db.test_batch2"); + final int parallelism = 2; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + for (String tableName : tableNames) { + prepareTable(ops, tableName); + populateTable(ops, tableName, 2); + } + + BatchedOrphanFilesDeletionSparkApp app = + newApp( + tableNames, parallelism, TimeUnit.DAYS.toSeconds(1), Collections.emptyList(), null); + + app.runInner(ops); + + for (String tableName : tableNames) { + Table table = ops.getTable(tableName); + Assertions.assertNotNull(table); + } + } + } + + @Test + public void testBatchedDeletionWithSingleTable() throws Exception { + final List tableNames = Arrays.asList("db.test_single"); + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + prepareTable(ops, tableNames.get(0)); + populateTable(ops, tableNames.get(0), 3); + + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 1, TimeUnit.DAYS.toSeconds(1), Collections.emptyList(), null); + + app.runInner(ops); + + Assertions.assertNotNull(ops.getTable(tableNames.get(0))); + } + } + + @Test + public void testBatchedDeletionWithInvalidTable() throws Exception { + final List tableNames = + Arrays.asList("db.test_valid1", "db.nonexistent_table", "db.test_valid2"); + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + prepareTable(ops, "db.test_valid1"); + populateTable(ops, "db.test_valid1", 2); + prepareTable(ops, "db.test_valid2"); + populateTable(ops, "db.test_valid2", 2); + + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 3, TimeUnit.DAYS.toSeconds(1), Collections.emptyList(), null); + + Assertions.assertThrows(RuntimeException.class, () -> app.runInner(ops)); + } + } + + @Test + public void testBatchedDeletionWithEmptyTables() throws Exception { + final List tableNames = Arrays.asList("db.test_empty1", "db.test_empty2"); + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + for (String tableName : tableNames) { + prepareTable(ops, tableName); + } + + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 2, TimeUnit.DAYS.toSeconds(1), Collections.emptyList(), null); + + app.runInner(ops); + + for (String tableName : tableNames) { + Assertions.assertNotNull(ops.getTable(tableName)); + } + } + } + + @Test + public void testOrphanDeletionResultSuccess() { + BatchedOrphanFilesDeletionSparkApp.OrphanDeletionResult result = + BatchedOrphanFilesDeletionSparkApp.OrphanDeletionResult.success( + "db.test_result", 42, 1024L, 5000L); + + Assertions.assertTrue(result.isSuccess()); + Assertions.assertEquals("db.test_result", result.getTableName()); + Assertions.assertEquals(42, result.getOrphanFilesDeleted()); + Assertions.assertEquals(1024L, result.getBytesDeleted()); + Assertions.assertEquals(5000L, result.getDurationMs()); + Assertions.assertNull(result.getErrorMessage()); + Assertions.assertNull(result.getErrorType()); + } + + @Test + public void testOrphanDeletionResultFailure() { + BatchedOrphanFilesDeletionSparkApp.OrphanDeletionResult result = + BatchedOrphanFilesDeletionSparkApp.OrphanDeletionResult.failure( + "db.test_failure", new RuntimeException("Test error"), 1000L); + + Assertions.assertFalse(result.isSuccess()); + Assertions.assertEquals("db.test_failure", result.getTableName()); + Assertions.assertEquals(0, result.getOrphanFilesDeleted()); + Assertions.assertEquals(0, result.getBytesDeleted()); + Assertions.assertEquals(1000L, result.getDurationMs()); + Assertions.assertEquals("Test error", result.getErrorMessage()); + Assertions.assertEquals("RuntimeException", result.getErrorType()); + } + + @Test + public void testCreateAppFromCommandLineArgs() { + String[] args = { + "--jobId", "test-job-123", + "--storageURL", "http://localhost:8080", + "--tableNames", "db.table1,db.table2,db.table3", + "--parallelism", "5", + "--ttl", "86400", + "--backupDir", "/backup", + "--concurrentDeletes", "20", + "--operationIds", "uuid-1,uuid-2,uuid-3", + "--resultsEndpoint", "http://localhost:8083/v1/optimizer/operations" + }; + + Assertions.assertNotNull(BatchedOrphanFilesDeletionSparkApp.createApp(args, otelEmitter)); + } + + @Test + public void testCreateAppWithDefaultValues() { + String[] args = { + "--jobId", "test-job-456", + "--storageURL", "http://localhost:8080", + "--tableNames", "db.table1" + }; + + Assertions.assertNotNull(BatchedOrphanFilesDeletionSparkApp.createApp(args, otelEmitter)); + } + + @Test + public void testCreateAppWithOperationIdsAndEndpoint() { + String[] args = { + "--jobId", "test-job-789", + "--storageURL", "http://localhost:8080", + "--tableNames", "db.table1,db.table2", + "--operationIds", "uuid-1,uuid-2", + "--resultsEndpoint", "http://localhost:8083/v1/optimizer/operations" + }; + + Assertions.assertNotNull(BatchedOrphanFilesDeletionSparkApp.createApp(args, otelEmitter)); + } + + @Test + public void testBatchedDeletionWithActualOrphanFiles() throws Exception { + final String tableName = "db.test_orphans"; + final List tableNames = Arrays.asList(tableName); + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + prepareTable(ops, tableName); + populateTable(ops, tableName, 3); + + Assertions.assertNotNull(ops.getTable(tableName).currentSnapshot()); + + // TTL=0 bypasses the minimum-age guard so seeded orphan files are eligible. + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 1, 0L, Collections.emptyList(), null); + + app.runInner(ops); + + Assertions.assertNotNull(ops.getTable(tableName).currentSnapshot()); + } + } + + @Test + public void testBatchedDeletionPartialSuccessWithEndpoint() throws Exception { + final List tableNames = Arrays.asList("db.test_ep_valid", "db.nonexistent_ep"); + final List operationIds = Arrays.asList("op-100", "op-200"); + + List receivedBodies = Collections.synchronizedList(new ArrayList<>()); + HttpServer httpServer = HttpServer.create(new InetSocketAddress(0), 0); + httpServer.createContext( + "/ops", + exchange -> { + byte[] body = exchange.getRequestBody().readAllBytes(); + receivedBodies.add(new String(body, StandardCharsets.UTF_8)); + exchange.sendResponseHeaders(200, 0); + exchange.close(); + }); + httpServer.start(); + String endpoint = "http://localhost:" + httpServer.getAddress().getPort() + "/ops"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + prepareTable(ops, "db.test_ep_valid"); + populateTable(ops, "db.test_ep_valid", 2); + + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 2, TimeUnit.DAYS.toSeconds(1), operationIds, endpoint); + + app.runInner(ops); + + Assertions.assertEquals(2, receivedBodies.size()); + long successCount = receivedBodies.stream().filter(b -> b.contains("\"SUCCESS\"")).count(); + long failureCount = receivedBodies.stream().filter(b -> b.contains("\"FAILED\"")).count(); + Assertions.assertEquals(1, successCount); + Assertions.assertEquals(1, failureCount); + + String successBody = + receivedBodies.stream().filter(b -> b.contains("\"SUCCESS\"")).findFirst().get(); + Assertions.assertFalse(successBody.contains("\"result\"")); + + String failureBody = + receivedBodies.stream().filter(b -> b.contains("\"FAILED\"")).findFirst().get(); + Assertions.assertTrue(failureBody.contains("\"errorMessage\"")); + Assertions.assertTrue(failureBody.contains("\"errorType\"")); + } finally { + httpServer.stop(0); + } + } + + @Test + public void testBatchedDeletionAllFailWithEndpoint() throws Exception { + final List tableNames = Arrays.asList("db.nonexistent_ep1", "db.nonexistent_ep2"); + final List operationIds = Arrays.asList("op-300", "op-400"); + + List receivedBodies = Collections.synchronizedList(new ArrayList<>()); + HttpServer httpServer = HttpServer.create(new InetSocketAddress(0), 0); + httpServer.createContext( + "/ops", + exchange -> { + byte[] body = exchange.getRequestBody().readAllBytes(); + receivedBodies.add(new String(body, StandardCharsets.UTF_8)); + exchange.sendResponseHeaders(200, 0); + exchange.close(); + }); + httpServer.start(); + String endpoint = "http://localhost:" + httpServer.getAddress().getPort() + "/ops"; + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + BatchedOrphanFilesDeletionSparkApp app = + newApp(tableNames, 2, TimeUnit.DAYS.toSeconds(1), operationIds, endpoint); + + Assertions.assertThrows(RuntimeException.class, () -> app.runInner(ops)); + + Assertions.assertEquals(2, receivedBodies.size()); + long failures = receivedBodies.stream().filter(b -> b.contains("\"FAILED\"")).count(); + Assertions.assertEquals(2, failures); + for (String body : receivedBodies) { + Assertions.assertTrue(body.contains("\"errorMessage\"")); + Assertions.assertTrue(body.contains("\"errorType\"")); + } + } finally { + httpServer.stop(0); + } + } + + @Test + public void testBatchedDeletionMismatchedOperationIdsThrows() throws Exception { + final List tableNames = Arrays.asList("db.table1", "db.table2"); + final List operationIds = Arrays.asList("op-1", "op-2", "op-3"); + + BatchedOrphanFilesDeletionSparkApp app = + newApp( + tableNames, 2, TimeUnit.DAYS.toSeconds(1), operationIds, "http://localhost:9999/ops"); + + try (Operations ops = Operations.withCatalog(getSparkSession(), otelEmitter)) { + Assertions.assertThrows(IllegalArgumentException.class, () -> app.runInner(ops)); + } + } + + private BatchedOrphanFilesDeletionSparkApp newApp( + List tableNames, + int parallelism, + long ttlSeconds, + List operationIds, + String resultsEndpoint) { + return new BatchedOrphanFilesDeletionSparkApp( + "test-job", + null, + tableNames, + parallelism, + ttlSeconds, + otelEmitter, + ".backup", + 10, + false, + MAX_SAMPLE_SIZE, + operationIds, + resultsEndpoint); + } + + private static void prepareTable(Operations ops, String tableName) { + ops.spark().sql(String.format("DROP TABLE IF EXISTS %s", tableName)).show(); + ops.spark().sql(String.format("CREATE TABLE %s (data string, ts timestamp)", tableName)).show(); + } + + private static void populateTable(Operations ops, String tableName, int numRows) { + for (int row = 0; row < numRows; ++row) { + ops.spark() + .sql(String.format("INSERT INTO %s VALUES ('v%d', current_timestamp())", tableName, row)) + .show(); + } + } +} From d3e01b4798852bb72e4d108deb0bfc81c95fc179 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:02:28 -0700 Subject: [PATCH 02/14] feat(optimizer): carry per-table OFD metrics on operation-complete MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - table_operations_history grows four nullable columns: orphan_files_deleted BIGINT orphan_bytes_deleted BIGINT error_message VARCHAR(1024) error_type VARCHAR(256) - Row, model, DTO, history-DTO all gain the four fields. - CompleteOperationRequestDto drops the body-based operationId — id moves to the URL path. New endpoint shape is POST /v1/optimizer/operations/{id}/complete. - OptimizerDataService.completeOperation grows four trailing nullable parameters carrying the metrics / error details. - Scheduler config default + test fixture URLs migrate from the stale /v1/table-operations to the correct /v1/optimizer/operations. - BatchedOFD Spark app's OperationResult payload flattens the nested ResultPayload into peer fields so the body matches the flat DTO. Co-Authored-By: Claude Opus 4.7 --- .../src/main/resources/application.properties | 2 +- .../scheduler/SchedulerRunnerTest.java | 2 +- .../resources/application-test.properties | 2 +- .../BatchedOrphanFilesDeletionSparkApp.java | 28 ++++++-------- ...atchedOrphanFilesDeletionSparkAppTest.java | 9 ++++- .../controller/TableOperationsController.java | 20 ++++++---- .../api/spec/CompleteOperationRequestDto.java | 30 +++++++++------ .../api/spec/TableOperationsHistoryDto.java | 20 ++++++++++ .../db/TableOperationsHistoryRow.java | 16 ++++++++ .../model/TableOperationsHistory.java | 20 ++++++++++ .../service/OptimizerDataService.java | 16 ++++++-- .../service/OptimizerDataServiceImpl.java | 11 +++++- .../main/resources/db/optimizer-schema.sql | 18 +++++---- .../service/OptimizerDataServiceImplTest.java | 37 ++++++++++++++++++- 14 files changed, 178 insertions(+), 53 deletions(-) diff --git a/apps/optimizer-scheduler/src/main/resources/application.properties b/apps/optimizer-scheduler/src/main/resources/application.properties index 4d849797c..bdaf4cbe6 100644 --- a/apps/optimizer-scheduler/src/main/resources/application.properties +++ b/apps/optimizer-scheduler/src/main/resources/application.properties @@ -6,5 +6,5 @@ spring.datasource.password=${OPTIMIZER_DB_PASSWORD:} spring.jpa.hibernate.ddl-auto=none jobs.base-uri=${JOBS_BASE_URI:http://localhost:8002} scheduler.ofd.max-files-per-bin=${SCHEDULER_OFD_MAX_FILES_PER_BIN:1000000} -scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/table-operations} +scheduler.results-endpoint=${SCHEDULER_RESULTS_ENDPOINT:http://openhouse-optimizer:8080/v1/optimizer/operations} scheduler.cluster-id=${SCHEDULER_CLUSTER_ID:LocalHadoopCluster} diff --git a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java index 7722a6156..aadb3c68e 100644 --- a/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java +++ b/apps/optimizer-scheduler/src/test/java/com/linkedin/openhouse/scheduler/SchedulerRunnerTest.java @@ -38,7 +38,7 @@ class SchedulerRunnerTest { private static final com.linkedin.openhouse.optimizer.db.OperationStatus PENDING_DB = com.linkedin.openhouse.optimizer.db.OperationStatus.PENDING; private static final String OFD_STR = OFD.name(); - private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/table-operations"; + private static final String RESULTS_ENDPOINT = "http://localhost:8080/v1/optimizer/operations"; @Mock private TableOperationsRepository operationsRepo; @Mock private TableStatsRepository statsRepo; diff --git a/apps/optimizer-scheduler/src/test/resources/application-test.properties b/apps/optimizer-scheduler/src/test/resources/application-test.properties index 5904d71ba..35233829b 100644 --- a/apps/optimizer-scheduler/src/test/resources/application-test.properties +++ b/apps/optimizer-scheduler/src/test/resources/application-test.properties @@ -6,5 +6,5 @@ spring.sql.init.mode=always spring.sql.init.schema-locations=classpath:schema.sql jobs.base-uri=http://localhost:9999 scheduler.ofd.max-files-per-bin=1000000 -scheduler.results-endpoint=http://localhost:8080/v1/table-operations +scheduler.results-endpoint=http://localhost:8080/v1/optimizer/operations scheduler.cluster-id=test-cluster diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java index 0908e7afa..e13bbca87 100644 --- a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java +++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java @@ -273,36 +273,30 @@ public static OrphanDeletionResult failure(String tableName, Throwable error, lo @JsonInclude(JsonInclude.Include.NON_NULL) static class OperationResult { public final String status; - public final ResultPayload result; public final Integer orphanFilesDeleted; public final Long orphanBytesDeleted; + public final String errorMessage; + public final String errorType; private OperationResult( - String status, ResultPayload result, Integer orphanFilesDeleted, Long orphanBytesDeleted) { + String status, + Integer orphanFilesDeleted, + Long orphanBytesDeleted, + String errorMessage, + String errorType) { this.status = status; - this.result = result; this.orphanFilesDeleted = orphanFilesDeleted; this.orphanBytesDeleted = orphanBytesDeleted; + this.errorMessage = errorMessage; + this.errorType = errorType; } static OperationResult success(int orphanFilesDeleted, long orphanBytesDeleted) { - return new OperationResult("SUCCESS", null, orphanFilesDeleted, orphanBytesDeleted); + return new OperationResult("SUCCESS", orphanFilesDeleted, orphanBytesDeleted, null, null); } static OperationResult failure(String errorMessage, String errorType) { - return new OperationResult("FAILED", new ResultPayload(errorMessage, errorType), null, null); - } - } - - /** Error details nested inside {@link OperationResult}. */ - @JsonInclude(JsonInclude.Include.NON_NULL) - static class ResultPayload { - public final String errorMessage; - public final String errorType; - - ResultPayload(String errorMessage, String errorType) { - this.errorMessage = errorMessage; - this.errorType = errorType; + return new OperationResult("FAILED", null, null, errorMessage, errorType); } } diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppTest.java index 26d4ae460..9f057a8d7 100644 --- a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppTest.java +++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppTest.java @@ -239,14 +239,21 @@ public void testBatchedDeletionPartialSuccessWithEndpoint() throws Exception { Assertions.assertEquals(1, successCount); Assertions.assertEquals(1, failureCount); + // SUCCESS payload carries orphan-file/byte metrics; no error fields. String successBody = receivedBodies.stream().filter(b -> b.contains("\"SUCCESS\"")).findFirst().get(); - Assertions.assertFalse(successBody.contains("\"result\"")); + Assertions.assertTrue(successBody.contains("\"orphanFilesDeleted\"")); + Assertions.assertTrue(successBody.contains("\"orphanBytesDeleted\"")); + Assertions.assertFalse(successBody.contains("\"errorMessage\"")); + Assertions.assertFalse(successBody.contains("\"errorType\"")); + // FAILED payload carries error fields; no orphan metrics. String failureBody = receivedBodies.stream().filter(b -> b.contains("\"FAILED\"")).findFirst().get(); Assertions.assertTrue(failureBody.contains("\"errorMessage\"")); Assertions.assertTrue(failureBody.contains("\"errorType\"")); + Assertions.assertFalse(failureBody.contains("\"orphanFilesDeleted\"")); + Assertions.assertFalse(failureBody.contains("\"orphanBytesDeleted\"")); } finally { httpServer.stop(0); } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java index accf6d543..20bad278b 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableOperationsController.java @@ -29,18 +29,22 @@ public class TableOperationsController { private final OptimizerDataService service; /** - * Report that an operation has completed. The body carries the {@code operationId} the caller is - * completing along with its terminal status. The backend looks up the operation row, writes a - * history entry with the operation's table metadata, and returns 201 Created with the history - * row, or 404 if the operation does not exist. + * Report that an operation has completed. {@code id} is the operation's UUID; the body carries + * the terminal status and any per-operation metrics or error details. The backend looks up the + * operation row, writes a history entry with the operation's table metadata plus the supplied + * metrics, and returns 201 Created with the history row, or 404 if the operation does not exist. */ - @PostMapping("/complete") + @PostMapping("/{id}/complete") public ResponseEntity completeOperation( - @RequestBody CompleteOperationRequestDto request) { + @PathVariable String id, @RequestBody CompleteOperationRequestDto request) { return service .completeOperation( - request.getOperationId(), - request.getStatus() == null ? null : request.getStatus().toModel()) + id, + request.getStatus() == null ? null : request.getStatus().toModel(), + request.getOrphanFilesDeleted(), + request.getOrphanBytesDeleted(), + request.getErrorMessage(), + request.getErrorType()) .map( history -> ResponseEntity.status(HttpStatus.CREATED) diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java index 9dca54a8e..c6266812d 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/CompleteOperationRequestDto.java @@ -6,20 +6,19 @@ import lombok.NoArgsConstructor; /** - * Request body for {@code POST /v1/table-operations/complete}. + * Request body for {@code POST /v1/optimizer/operations/{id}/complete}. * - *

Reports the outcome of a single completed operation. The service looks up the operation row by - * {@link #operationId} and writes a history entry for it. + *

Reports the outcome of a single completed operation. The operation row's UUID lives in the URL + * path; the body carries the terminal status and any operation-type-specific result metrics. * *

A single Spark job typically processes N tables and yields N independent (status) outcomes — * one per operation. Callers issue one complete request per operation; the service does not * bulk-complete by job. * - *

The remaining fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link - * #operationType}) are debug-only echo information. The server does not key off them; they are - * preserved on log lines and traces so an operator looking at a failing complete call can see which - * (db, table, operation) the caller believed it was completing without joining back to the - * operation row. + *

The debug-echo fields ({@link #tableUuid}, {@link #databaseName}, {@link #tableName}, {@link + * #operationType}) are optional. The server does not key off them; they are preserved on log lines + * and traces so an operator looking at a failing complete call can see which (db, table, operation) + * the caller believed it was completing without joining back to the operation row. */ @Data @Builder @@ -27,12 +26,21 @@ @AllArgsConstructor public class CompleteOperationRequestDto { - /** Operation row's UUID — the primary lookup key. */ - private String operationId; - /** Terminal outcome for this single operation. */ private HistoryStatusDto status; + /** OFD-specific: number of orphan files deleted. Null on failure or non-OFD operation. */ + private Long orphanFilesDeleted; + + /** OFD-specific: bytes reclaimed. Null on failure or non-OFD operation. */ + private Long orphanBytesDeleted; + + /** On failure, the exception message from the Spark-side worker. Null on success. */ + private String errorMessage; + + /** On failure, the simple name of the exception class. Null on success. */ + private String errorType; + /** Debug echo: stable table identity the caller believed it was completing. */ private String tableUuid; diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java index 8b508bf36..6ee23b135 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsHistoryDto.java @@ -35,6 +35,18 @@ public class TableOperationsHistoryDto { /** {@code SUCCESS} or {@code FAILED}. */ private HistoryStatusDto status; + /** OFD-specific: number of orphan files deleted; null if not OFD or on failure. */ + private Long orphanFilesDeleted; + + /** OFD-specific: bytes reclaimed by orphan file deletion; null if not OFD or on failure. */ + private Long orphanBytesDeleted; + + /** On failure, the message from the Spark-side exception. Null on success. */ + private String errorMessage; + + /** On failure, the simple name of the Spark-side exception class. Null on success. */ + private String errorType; + /** Convert to the internal-model counterpart. */ public TableOperationsHistory toModel() { return TableOperationsHistory.builder() @@ -45,6 +57,10 @@ public TableOperationsHistory toModel() { .operationType(operationType == null ? null : operationType.toModel()) .completedAt(completedAt) .status(status == null ? null : status.toModel()) + .orphanFilesDeleted(orphanFilesDeleted) + .orphanBytesDeleted(orphanBytesDeleted) + .errorMessage(errorMessage) + .errorType(errorType) .build(); } @@ -61,6 +77,10 @@ public static TableOperationsHistoryDto fromModel(TableOperationsHistory h) { .operationType(OperationTypeDto.fromModel(h.getOperationType())) .completedAt(h.getCompletedAt()) .status(HistoryStatusDto.fromModel(h.getStatus())) + .orphanFilesDeleted(h.getOrphanFilesDeleted()) + .orphanBytesDeleted(h.getOrphanBytesDeleted()) + .errorMessage(h.getErrorMessage()) + .errorType(h.getErrorType()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java index 5f4a598d9..484c3e8dd 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/db/TableOperationsHistoryRow.java @@ -72,4 +72,20 @@ public class TableOperationsHistoryRow { @Enumerated(EnumType.STRING) @Column(name = "status", nullable = false, length = 20) private HistoryStatus status; + + /** OFD-specific: number of orphan files deleted; null if not an OFD operation or on failure. */ + @Column(name = "orphan_files_deleted") + private Long orphanFilesDeleted; + + /** OFD-specific: bytes reclaimed by orphan file deletion; null if not OFD or on failure. */ + @Column(name = "orphan_bytes_deleted") + private Long orphanBytesDeleted; + + /** On failure, the message from the Spark-side exception. Null on success. */ + @Column(name = "error_message", length = 1024) + private String errorMessage; + + /** On failure, the simple name of the Spark-side exception class. Null on success. */ + @Column(name = "error_type", length = 256) + private String errorType; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java index 8cbfb6ff7..48381cbe8 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperationsHistory.java @@ -40,6 +40,18 @@ public class TableOperationsHistory { /** Terminal outcome: {@link HistoryStatus#SUCCESS} or {@link HistoryStatus#FAILED}. */ private HistoryStatus status; + /** OFD-specific: number of orphan files deleted; null if not an OFD operation or on failure. */ + private Long orphanFilesDeleted; + + /** OFD-specific: bytes reclaimed by orphan file deletion; null if not OFD or on failure. */ + private Long orphanBytesDeleted; + + /** On failure, the message from the Spark-side exception. Null on success. */ + private String errorMessage; + + /** On failure, the simple name of the Spark-side exception class. Null on success. */ + private String errorType; + /** Convert to the corresponding DB row. */ public TableOperationsHistoryRow toRow() { return TableOperationsHistoryRow.builder() @@ -50,6 +62,10 @@ public TableOperationsHistoryRow toRow() { .operationType(operationType == null ? null : operationType.toDb()) .completedAt(completedAt) .status(status == null ? null : status.toDb()) + .orphanFilesDeleted(orphanFilesDeleted) + .orphanBytesDeleted(orphanBytesDeleted) + .errorMessage(errorMessage) + .errorType(errorType) .build(); } @@ -66,6 +82,10 @@ public static TableOperationsHistory fromRow(TableOperationsHistoryRow row) { .operationType(OperationType.fromDb(row.getOperationType())) .completedAt(row.getCompletedAt()) .status(HistoryStatus.fromDb(row.getStatus())) + .orphanFilesDeleted(row.getOrphanFilesDeleted()) + .orphanBytesDeleted(row.getOrphanBytesDeleted()) + .errorMessage(row.getErrorMessage()) + .errorType(row.getErrorType()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java index 5d5edaee2..9e0ae3403 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataService.java @@ -36,10 +36,20 @@ List listTableOperations( /** * Complete an operation by writing a history entry. Looks up the operation row by {@code * operationId}, copies its table metadata into a new history row with the supplied terminal - * {@code status}, and saves it. Returns the history record, or empty if the operation does not - * exist. + * {@code status} and optional per-operation metrics / error details, and saves it. Returns the + * history record, or empty if the operation does not exist. + * + *

The four trailing parameters are all nullable. Successful OFD operations populate {@code + * orphanFilesDeleted} / {@code orphanBytesDeleted}; failures populate {@code errorMessage} / + * {@code errorType}. Non-OFD operations leave all four null. */ - Optional completeOperation(String operationId, HistoryStatus status); + Optional completeOperation( + String operationId, + HistoryStatus status, + Long orphanFilesDeleted, + Long orphanBytesDeleted, + String errorMessage, + String errorType); /** * Return the operation row for {@code id} regardless of status, or empty if it does not exist. diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java index 633411e98..58783d844 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImpl.java @@ -63,7 +63,12 @@ public List listTableOperations( @Override @Transactional public Optional completeOperation( - String operationId, HistoryStatus status) { + String operationId, + HistoryStatus status, + Long orphanFilesDeleted, + Long orphanBytesDeleted, + String errorMessage, + String errorType) { return operationsRepository .findById(operationId) .map( @@ -76,6 +81,10 @@ public Optional completeOperation( .operationType(OperationType.fromDb(row.getOperationType())) .completedAt(Instant.now()) .status(status) + .orphanFilesDeleted(orphanFilesDeleted) + .orphanBytesDeleted(orphanBytesDeleted) + .errorMessage(errorMessage) + .errorType(errorType) .build()) .map(history -> TableOperationsHistory.fromRow(historyRepository.save(history.toRow()))); } diff --git a/services/optimizer/src/main/resources/db/optimizer-schema.sql b/services/optimizer/src/main/resources/db/optimizer-schema.sql index 892c1c55f..24071e2e8 100644 --- a/services/optimizer/src/main/resources/db/optimizer-schema.sql +++ b/services/optimizer/src/main/resources/db/optimizer-schema.sql @@ -38,13 +38,17 @@ CREATE TABLE IF NOT EXISTS table_stats_history ( ); CREATE TABLE IF NOT EXISTS table_operations_history ( - id VARCHAR(36) NOT NULL, - table_uuid VARCHAR(36) NOT NULL, - database_name VARCHAR(128) NOT NULL, - table_name VARCHAR(128) NOT NULL, - operation_type VARCHAR(50) NOT NULL, - completed_at TIMESTAMP(6) NOT NULL, - status VARCHAR(20) NOT NULL, + id VARCHAR(36) NOT NULL, + table_uuid VARCHAR(36) NOT NULL, + database_name VARCHAR(128) NOT NULL, + table_name VARCHAR(128) NOT NULL, + operation_type VARCHAR(50) NOT NULL, + completed_at TIMESTAMP(6) NOT NULL, + status VARCHAR(20) NOT NULL, + orphan_files_deleted BIGINT, + orphan_bytes_deleted BIGINT, + error_message VARCHAR(1024), + error_type VARCHAR(256), PRIMARY KEY (id), INDEX idx_toph_db_table (database_name, table_name), -- Drives TableOperationHistoryRepository.findLatestPerTable: the correlated diff --git a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java index b329459ad..010195d2a 100644 --- a/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java +++ b/services/optimizer/src/test/java/com/linkedin/openhouse/optimizer/service/OptimizerDataServiceImplTest.java @@ -54,7 +54,7 @@ void completeOperation_writesHistoryFromOperationRow() { .build()); Optional result = - service.completeOperation(operationId, HistoryStatus.SUCCESS); + service.completeOperation(operationId, HistoryStatus.SUCCESS, 42L, 1024L, null, null); assertThat(result).isPresent(); assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.SUCCESS); @@ -62,12 +62,45 @@ void completeOperation_writesHistoryFromOperationRow() { assertThat(result.get().getOperationType()).isEqualTo(OperationType.ORPHAN_FILES_DELETION); assertThat(result.get().getDatabaseName()).isEqualTo("db1"); assertThat(result.get().getCompletedAt()).isNotNull(); + assertThat(result.get().getOrphanFilesDeleted()).isEqualTo(42L); + assertThat(result.get().getOrphanBytesDeleted()).isEqualTo(1024L); + assertThat(result.get().getErrorMessage()).isNull(); + assertThat(result.get().getErrorType()).isNull(); + } + + @Test + void completeOperation_failurePersistsErrorFields() { + String operationId = UUID.randomUUID().toString(); + operationsRepository.save( + TableOperationsRow.builder() + .id(operationId) + .tableUuid(UUID.randomUUID().toString()) + .databaseName("db1") + .tableName("tbl1") + .operationType(com.linkedin.openhouse.optimizer.db.OperationType.ORPHAN_FILES_DELETION) + .status(com.linkedin.openhouse.optimizer.db.OperationStatus.SCHEDULED) + .createdAt(Instant.now()) + .scheduledAt(Instant.now()) + .jobId("spark-job-456") + .build()); + + Optional result = + service.completeOperation( + operationId, HistoryStatus.FAILED, null, null, "boom", "RuntimeException"); + + assertThat(result).isPresent(); + assertThat(result.get().getStatus()).isEqualTo(HistoryStatus.FAILED); + assertThat(result.get().getOrphanFilesDeleted()).isNull(); + assertThat(result.get().getOrphanBytesDeleted()).isNull(); + assertThat(result.get().getErrorMessage()).isEqualTo("boom"); + assertThat(result.get().getErrorType()).isEqualTo("RuntimeException"); } @Test void completeOperation_notFound_returnsEmpty() { Optional result = - service.completeOperation(UUID.randomUUID().toString(), HistoryStatus.FAILED); + service.completeOperation( + UUID.randomUUID().toString(), HistoryStatus.FAILED, null, null, null, null); assertThat(result).isEmpty(); } From d0bdd16f2e434c903e37d8369dbaa3a66ddb5b2e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:08:48 -0700 Subject: [PATCH 03/14] feat(tables): on-commit hook posts stats to the optimizer New OptimizerTableStatsClient PUTs /v1/optimizer/stats/{tableUuid} after every successful Iceberg commit, carrying database/table names and the current tableProperties (which the optimizer analyzer reads to decide opt-in for OFD via maintenance.optimizer.ofd.enabled). Activated by cluster.optimizer.base-uri. When the property is absent the bean is not created and IcebergSnapshotsServiceImpl skips the call (behavior unchanged for unit tests and dev clusters without an optimizer). The hook is best-effort: WebClient failures are caught and logged at WARN; the commit itself succeeds either way. The analyzer re-reads stats on every cycle so a transient post failure is recovered by the next commit. Co-Authored-By: Claude Opus 4.7 --- .../cluster/configs/ClusterProperties.java | 7 ++ .../config/OptimizerTableStatsClient.java | 95 +++++++++++++++++++ .../services/IcebergSnapshotsServiceImpl.java | 11 ++- .../service/IcebergSnapshotsServiceTest.java | 5 + 4 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/config/OptimizerTableStatsClient.java diff --git a/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java b/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java index 9d0c81876..e8eb9cd71 100644 --- a/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java +++ b/cluster/configs/src/main/java/com/linkedin/openhouse/cluster/configs/ClusterProperties.java @@ -96,4 +96,11 @@ public class ClusterProperties { // string @Value("${cluster.tables.allowed-client-name-values:}") private List allowedClientNameValues; + + /** + * Base URI of the optimizer service. When set, the Tables Service POSTs a stats snapshot to the + * optimizer on every successful Iceberg commit; when null, the hook is a no-op. + */ + @Value("${cluster.optimizer.base-uri:#{null}}") + private String clusterOptimizerBaseUri; } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/config/OptimizerTableStatsClient.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/config/OptimizerTableStatsClient.java new file mode 100644 index 000000000..f5881c7a4 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/config/OptimizerTableStatsClient.java @@ -0,0 +1,95 @@ +package com.linkedin.openhouse.tables.config; + +import com.linkedin.openhouse.tables.model.TableDto; +import java.time.Duration; +import java.util.Collections; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.client.WebClient; + +/** + * Thin client that pushes a per-table stats snapshot to the optimizer service on every successful + * Iceberg commit. + * + *

Activated by {@code cluster.optimizer.base-uri}. When absent (e.g. unit tests, dev clusters + * without an optimizer), the bean is not created and {@link + * com.linkedin.openhouse.tables.services.IcebergSnapshotsServiceImpl} skips the call. + * + *

The hook is best-effort: failures are logged but do not break the commit. The optimizer + * analyzer re-reads stats on each cycle, so a transient post failure is recovered on the next + * commit. + */ +@Component +@ConditionalOnProperty(name = "cluster.optimizer.base-uri") +@Slf4j +public class OptimizerTableStatsClient { + + private static final Duration TIMEOUT = Duration.ofSeconds(5); + + private final WebClient webClient; + + public OptimizerTableStatsClient(@Value("${cluster.optimizer.base-uri}") String baseUri) { + this.webClient = WebClient.builder().baseUrl(baseUri).build(); + } + + /** + * PUT a stats row for {@code table} to the optimizer service. Returns synchronously after the + * write completes or fails; exceptions are caught and logged at WARN. + */ + public void upsertTableStats(TableDto table) { + String tableUuid = table.getTableUUID(); + if (tableUuid == null) { + log.warn( + "Skipping optimizer stats push for {}.{}: tableUUID is null", + table.getDatabaseId(), + table.getTableId()); + return; + } + Map tableProperties = + table.getTableProperties() == null ? Collections.emptyMap() : table.getTableProperties(); + UpsertStatsBody body = + UpsertStatsBody.builder() + .databaseName(table.getDatabaseId()) + .tableName(table.getTableId()) + .tableProperties(tableProperties) + .build(); + try { + webClient + .put() + .uri("/v1/optimizer/stats/{tableUuid}", tableUuid) + .bodyValue(body) + .retrieve() + .toBodilessEntity() + .block(TIMEOUT); + } catch (Exception e) { + log.warn( + "Optimizer stats push failed for tableUuid={} ({}.{}): {}", + tableUuid, + table.getDatabaseId(), + table.getTableId(), + e.getMessage()); + } + } + + /** + * Wire body matching {@link + * com.linkedin.openhouse.optimizer.api.spec.UpsertTableStatsRequestDto}. Kept as a local + * representation so this module does not need to depend on the optimizer api module. + */ + @Data + @Builder + @NoArgsConstructor + @AllArgsConstructor + static class UpsertStatsBody { + private String databaseName; + private String tableName; + private Map tableProperties; + } +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java index af43a169e..efa18e174 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java @@ -6,6 +6,7 @@ import com.linkedin.openhouse.common.exception.UnsupportedClientOperationException; import com.linkedin.openhouse.tables.api.spec.v0.request.IcebergSnapshotsRequestBody; import com.linkedin.openhouse.tables.authorization.Privileges; +import com.linkedin.openhouse.tables.config.OptimizerTableStatsClient; import com.linkedin.openhouse.tables.dto.mapper.TablesMapper; import com.linkedin.openhouse.tables.model.TableDto; import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; @@ -32,6 +33,10 @@ public class IcebergSnapshotsServiceImpl implements IcebergSnapshotsService { @Autowired AuthorizationUtils authorizationUtils; + /** Optional — wired only when {@code cluster.optimizer.base-uri} is set. */ + @Autowired(required = false) + OptimizerTableStatsClient optimizerTableStatsClient; + @Override public Pair putIcebergSnapshots( String databaseId, @@ -83,7 +88,11 @@ public Pair putIcebergSnapshots( databaseId, tableCreatorUpdater, Privileges.CREATE_TABLE); } try { - return Pair.of(openHouseInternalRepository.save(tableDtoToSave), !tableDto.isPresent()); + TableDto saved = openHouseInternalRepository.save(tableDtoToSave); + if (optimizerTableStatsClient != null) { + optimizerTableStatsClient.upsertTableStats(saved); + } + return Pair.of(saved, !tableDto.isPresent()); } catch (BadRequestException e) { throw new RequestValidationFailureException(e.getMessage(), e); } catch (CommitFailedException ce) { diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java index 44467b705..37b9e5367 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java @@ -9,6 +9,7 @@ import com.linkedin.openhouse.tables.api.spec.v0.request.IcebergSnapshotsRequestBody; import com.linkedin.openhouse.tables.api.spec.v0.request.components.LockState; import com.linkedin.openhouse.tables.api.spec.v0.request.components.Policies; +import com.linkedin.openhouse.tables.config.OptimizerTableStatsClient; import com.linkedin.openhouse.tables.dto.mapper.TablesMapper; import com.linkedin.openhouse.tables.dto.mapper.TablesMapperImpl; import com.linkedin.openhouse.tables.model.TableDto; @@ -45,6 +46,8 @@ public class IcebergSnapshotsServiceTest { @MockBean private TableUUIDGenerator tableUUIDGenerator; + @MockBean private OptimizerTableStatsClient optimizerTableStatsClient; + private OpenHouseInternalRepository mockRepository; @Captor ArgumentCaptor tableDtoArgumentCaptor; @@ -88,6 +91,8 @@ public void testTableCreated() { Assertions.assertTrue(result.getSecond(), "Table must be created"); verifyCalls(key, TEST_TABLE_CREATOR, requestBody.getCreateUpdateTableRequestBody()); + Mockito.verify(optimizerTableStatsClient, Mockito.times(1)) + .upsertTableStats(Mockito.eq(tableDto)); } @Test From f188f552820daad009935a3fcca709b5282b33be Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:19:13 -0700 Subject: [PATCH 04/14] feat(docker): wire optimizer service/analyzer/scheduler into compose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three new repo-root Dockerfiles: - optimizer-service.Dockerfile (Spring web app, like tables-service) - optimizer-analyzer.Dockerfile (loops the CommandLineRunner on ANALYZER_INTERVAL_SECONDS; default 30s) - optimizer-scheduler.Dockerfile (loops on SCHEDULER_INTERVAL_SECONDS; default 30s) Compose wiring: - common/oh-services.yml — three new service blocks pointing at the new Dockerfiles; optimizer-service exposes 8005:8080 - oh-hadoop-spark/docker-compose.yml — includes the three services with MySQL credentials and inter-service URLs (analyzer/scheduler point at openhouse-optimizer; scheduler points at openhouse-jobs) - oh-hadoop-spark/cluster.yaml — sets cluster.optimizer.base-uri so the on-commit hook in tables service activates Build wiring: - build.gradle dockerPrereqs depends on the analyzer/scheduler bootJar targets Renamed OPTIMIZER_DB_USERNAME → OPTIMIZER_DB_USER in the optimizer service application.properties so all three optimizer JVMs read the same env var name. Co-Authored-By: Claude Opus 4.7 --- build.gradle | 6 +++ .../docker-compose/common/oh-services.yml | 14 ++++++ .../oh-hadoop-spark/cluster.yaml | 2 + .../oh-hadoop-spark/docker-compose.yml | 47 +++++++++++++++++++ optimizer-analyzer.Dockerfile | 44 +++++++++++++++++ optimizer-scheduler.Dockerfile | 43 +++++++++++++++++ optimizer-service.Dockerfile | 47 +++++++++++++++++++ .../src/main/resources/application.properties | 2 +- 8 files changed, 204 insertions(+), 1 deletion(-) create mode 100644 optimizer-analyzer.Dockerfile create mode 100644 optimizer-scheduler.Dockerfile create mode 100644 optimizer-service.Dockerfile diff --git a/build.gradle b/build.gradle index f7cdd71a8..25be0c820 100644 --- a/build.gradle +++ b/build.gradle @@ -178,6 +178,8 @@ tasks.register('CopyGitHooksTask', Copy) { // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar // optimizer-service.Dockerfile -> :services:optimizer:bootJar +// optimizer-analyzer.Dockerfile -> :apps:optimizer-analyzer:bootJar +// optimizer-scheduler.Dockerfile -> :apps:optimizer-scheduler:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -198,6 +200,8 @@ tasks.register('dockerPrereqs') { dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' dependsOn ':services:optimizer:bootJar' + dependsOn ':apps:optimizer-analyzer:bootJar' + dependsOn ':apps:optimizer-scheduler:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -222,6 +226,8 @@ tasks.register('dockerPrereqs') { println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' println ' build/optimizer/libs/optimizer.jar' + println ' build/optimizer-analyzer/libs/optimizer-analyzer.jar' + println ' build/optimizer-scheduler/libs/optimizer-scheduler.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/infra/recipes/docker-compose/common/oh-services.yml b/infra/recipes/docker-compose/common/oh-services.yml index 73ef5a656..71c4f889e 100644 --- a/infra/recipes/docker-compose/common/oh-services.yml +++ b/infra/recipes/docker-compose/common/oh-services.yml @@ -22,6 +22,20 @@ services: build: context: ../../../.. dockerfile: jobs-scheduler.Dockerfile + openhouse-optimizer: + build: + context: ../../../.. + dockerfile: optimizer-service.Dockerfile + ports: + - 8005:8080 + openhouse-optimizer-analyzer: + build: + context: ../../../.. + dockerfile: optimizer-analyzer.Dockerfile + openhouse-optimizer-scheduler: + build: + context: ../../../.. + dockerfile: optimizer-scheduler.Dockerfile prometheus: image: prom/prometheus:v2.21.0 ports: diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/cluster.yaml b/infra/recipes/docker-compose/oh-hadoop-spark/cluster.yaml index bb72176c5..81d142815 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/cluster.yaml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/cluster.yaml @@ -23,6 +23,8 @@ cluster: database: type: "MYSQL" url: "jdbc:mysql://mysql:3306/oh_db?allowPublicKeyRetrieval=true&useSSL=false" + optimizer: + base-uri: "http://openhouse-optimizer:8080" security: token: interceptor: diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml index 0b8df01ac..73e8e2e85 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml @@ -81,6 +81,53 @@ services: - OTEL_LOGS_EXPORTER=none - OTEL_TRACES_SAMPLER=always_on + openhouse-optimizer: + container_name: local.openhouse-optimizer + extends: + file: ../common/oh-services.yml + service: openhouse-optimizer + volumes: + - ./:/var/config/ + depends_on: + - mysql + environment: + - OPTIMIZER_DB_URL=jdbc:mysql://mysql:3306/oh_db?allowPublicKeyRetrieval=true&useSSL=false + - OPTIMIZER_DB_USER=oh_user + - OPTIMIZER_DB_PASSWORD=oh_password + + openhouse-optimizer-analyzer: + container_name: local.openhouse-optimizer-analyzer + extends: + file: ../common/oh-services.yml + service: openhouse-optimizer-analyzer + volumes: + - ./:/var/config/ + depends_on: + - openhouse-optimizer + environment: + - OPTIMIZER_DB_URL=jdbc:mysql://mysql:3306/oh_db?allowPublicKeyRetrieval=true&useSSL=false + - OPTIMIZER_DB_USER=oh_user + - OPTIMIZER_DB_PASSWORD=oh_password + - ANALYZER_INTERVAL_SECONDS=30 + + openhouse-optimizer-scheduler: + container_name: local.openhouse-optimizer-scheduler + extends: + file: ../common/oh-services.yml + service: openhouse-optimizer-scheduler + volumes: + - ./:/var/config/ + depends_on: + - openhouse-optimizer + - openhouse-jobs + environment: + - OPTIMIZER_DB_URL=jdbc:mysql://mysql:3306/oh_db?allowPublicKeyRetrieval=true&useSSL=false + - OPTIMIZER_DB_USER=oh_user + - OPTIMIZER_DB_PASSWORD=oh_password + - JOBS_BASE_URI=http://openhouse-jobs:8080 + - SCHEDULER_RESULTS_ENDPOINT=http://openhouse-optimizer:8080/v1/optimizer/operations + - SCHEDULER_INTERVAL_SECONDS=30 + jaeger: container_name: local.jaeger image: jaegertracing/all-in-one:1.54 diff --git a/optimizer-analyzer.Dockerfile b/optimizer-analyzer.Dockerfile new file mode 100644 index 000000000..c2374c456 --- /dev/null +++ b/optimizer-analyzer.Dockerfile @@ -0,0 +1,44 @@ +FROM openjdk:23-ea-11-slim + +ARG USER=openhouse +ARG USER_ID=1000 +ARG GROUP_ID=1000 +ENV APP_NAME=optimizer-analyzer +ENV USER_HOME=/home/$USER +ENV ANALYZER_INTERVAL_SECONDS=30 + +# Create an openhouse user as there's no reason to run as root user +RUN groupadd --force -g $GROUP_ID $USER && useradd -l -d $USER_HOME -m $USER -u $USER_ID -g $GROUP_ID + +WORKDIR $USER_HOME + +# IMAGE does not set the necessary paths by default. +ENV PATH=$PATH:/export/apps/jdk/JDK-1_8_0_172/bin/:$USER_HOME + +ARG VERSION="1.0.0-SNAPSHOT" +ARG BUILD_DIR="build/$APP_NAME/libs" +ARG JAR_FILES=$BUILD_DIR/*.jar + +COPY $JAR_FILES ./ + +# Delete unwanted JAR files +RUN find . -name "*-sources.jar" -delete +RUN find . -name "*-javadoc.jar" -delete +RUN find . -name "*-lib.jar" -delete + +# Rename the JAR file. +RUN find ./ -name "*.jar" -exec mv {} $APP_NAME.jar \; +RUN ls $APP_NAME.jar + +# Ensure that everything in $USER_HOME is owned by openhouse user +RUN chown -R openhouse:openhouse $USER_HOME + +# Setup default path for Java +RUN mkdir -p /usr/java && ln -sfn /export/apps/jdk/JDK-1_8_0_172 /usr/java/default + +USER $USER + +# Loop the analyzer on a fixed interval. Each pass is a fresh JVM, so opt-3's +# analyzer (which runs once per CommandLineRunner invocation and exits) becomes +# a continuous service in this container. +ENTRYPOINT ["sh", "-c", "while true; do echo \"Running $APP_NAME at $(date)\"; java -Xmx256M -Xms64M -XX:NativeMemoryTracking=summary -jar $APP_NAME.jar; echo \"Exited; sleeping ${ANALYZER_INTERVAL_SECONDS}s\"; sleep ${ANALYZER_INTERVAL_SECONDS}; done"] diff --git a/optimizer-scheduler.Dockerfile b/optimizer-scheduler.Dockerfile new file mode 100644 index 000000000..d83fbe9e6 --- /dev/null +++ b/optimizer-scheduler.Dockerfile @@ -0,0 +1,43 @@ +FROM openjdk:23-ea-11-slim + +ARG USER=openhouse +ARG USER_ID=1000 +ARG GROUP_ID=1000 +ENV APP_NAME=optimizer-scheduler +ENV USER_HOME=/home/$USER +ENV SCHEDULER_INTERVAL_SECONDS=30 + +# Create an openhouse user as there's no reason to run as root user +RUN groupadd --force -g $GROUP_ID $USER && useradd -l -d $USER_HOME -m $USER -u $USER_ID -g $GROUP_ID + +WORKDIR $USER_HOME + +# IMAGE does not set the necessary paths by default. +ENV PATH=$PATH:/export/apps/jdk/JDK-1_8_0_172/bin/:$USER_HOME + +ARG VERSION="1.0.0-SNAPSHOT" +ARG BUILD_DIR="build/$APP_NAME/libs" +ARG JAR_FILES=$BUILD_DIR/*.jar + +COPY $JAR_FILES ./ + +# Delete unwanted JAR files +RUN find . -name "*-sources.jar" -delete +RUN find . -name "*-javadoc.jar" -delete +RUN find . -name "*-lib.jar" -delete + +# Rename the JAR file. +RUN find ./ -name "*.jar" -exec mv {} $APP_NAME.jar \; +RUN ls $APP_NAME.jar + +# Ensure that everything in $USER_HOME is owned by openhouse user +RUN chown -R openhouse:openhouse $USER_HOME + +# Setup default path for Java +RUN mkdir -p /usr/java && ln -sfn /export/apps/jdk/JDK-1_8_0_172 /usr/java/default + +USER $USER + +# Loop the scheduler on a fixed interval; each pass is a fresh JVM (opt-4's +# scheduler runs once per CommandLineRunner invocation and exits). +ENTRYPOINT ["sh", "-c", "while true; do echo \"Running $APP_NAME at $(date)\"; java -Xmx256M -Xms64M -XX:NativeMemoryTracking=summary -jar $APP_NAME.jar; echo \"Exited; sleeping ${SCHEDULER_INTERVAL_SECONDS}s\"; sleep ${SCHEDULER_INTERVAL_SECONDS}; done"] diff --git a/optimizer-service.Dockerfile b/optimizer-service.Dockerfile new file mode 100644 index 000000000..8f2af923c --- /dev/null +++ b/optimizer-service.Dockerfile @@ -0,0 +1,47 @@ +FROM openjdk:23-ea-11-slim + +ARG USER=openhouse +ARG USER_ID=1000 +ARG GROUP_ID=1000 +ENV APP_NAME=optimizer +ENV USER_HOME=/home/$USER + +# Create an openhouse user as there's no reason to run as root user +RUN groupadd --force -g $GROUP_ID $USER && useradd -l -d $USER_HOME -m $USER -u $USER_ID -g $GROUP_ID + +WORKDIR $USER_HOME + +# IMAGE does not set the necessary paths by default. +ENV PATH=$PATH:/export/apps/jdk/JDK-1_8_0_172/bin/:$USER_HOME + +ARG VERSION="1.0.0-SNAPSHOT" +ARG BUILD_DIR="build/$APP_NAME/libs" +ARG JAR_FILES=$BUILD_DIR/*.jar + +COPY $JAR_FILES ./ + +# Delete unwanted JAR files +RUN ls ./ +RUN find . -name "*-sources.jar" -delete +RUN find . -name "*-javadoc.jar" -delete +RUN find . -name "*-lib.jar" -delete + +# Rename the JAR file. +RUN ls ./ +RUN find ./ -name "*.jar" -exec mv {} $APP_NAME.jar \; + +RUN ls $APP_NAME.jar + +COPY run.sh . + + +# Ensure that everything in $USER_HOME is owned by openhouse user +RUN chown -R openhouse:openhouse $USER_HOME + +# Setup default path for Java +RUN mkdir -p /usr/java && ln -sfn /export/apps/jdk/JDK-1_8_0_172 /usr/java/default + +USER $USER + +EXPOSE 8080 +ENTRYPOINT ["sh", "-c", "./run.sh $APP_NAME.jar $@"] diff --git a/services/optimizer/src/main/resources/application.properties b/services/optimizer/src/main/resources/application.properties index c6c3f8437..e78745d00 100644 --- a/services/optimizer/src/main/resources/application.properties +++ b/services/optimizer/src/main/resources/application.properties @@ -12,7 +12,7 @@ spring.jpa.properties.hibernate.physical_naming_strategy=org.hibernate.boot.mode spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver spring.datasource.url=${OPTIMIZER_DB_URL:jdbc:mysql://localhost:3306/oh_db} -spring.datasource.username=${OPTIMIZER_DB_USERNAME:oh_user} +spring.datasource.username=${OPTIMIZER_DB_USER:oh_user} spring.datasource.password=${OPTIMIZER_DB_PASSWORD:oh_password} spring.datasource.hikari.maximum-pool-size=20 From 37304cfe45dfbd8d21fd793cf3dd29b81aeb4be2 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 09:22:36 -0700 Subject: [PATCH 05/14] fix(optimizer): point @EntityScan at the actual db package AnalyzerApplication and SchedulerApplication scanned com.linkedin.openhouse.optimizer.entity for JPA-managed entities, but the entities live in com.linkedin.openhouse.optimizer.db. Both apps crashed at startup against a real database (MySQL backend in docker) with "Not a managed type: class ...db.TableStatsRow". Unit tests didn't catch it because they configure JPA differently. Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/analyzer/AnalyzerApplication.java | 2 +- .../com/linkedin/openhouse/scheduler/SchedulerApplication.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java index edee9c02e..1b6250c5e 100644 --- a/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java +++ b/apps/optimizer-analyzer/src/main/java/com/linkedin/openhouse/analyzer/AnalyzerApplication.java @@ -10,7 +10,7 @@ /** Entry point for the Optimizer Analyzer application. */ @SpringBootApplication -@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db") @EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") public class AnalyzerApplication { diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java index b28d1b73c..0b3b836d3 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerApplication.java @@ -11,7 +11,7 @@ /** Entry point for the Optimizer Scheduler application. */ @SpringBootApplication -@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.entity") +@EntityScan(basePackages = "com.linkedin.openhouse.optimizer.db") @EnableJpaRepositories(basePackages = "com.linkedin.openhouse.optimizer.repository") public class SchedulerApplication { From 1ec3acf01bf31ca6bbd557ba4f66e568802ee8cd Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 20 May 2026 10:18:29 -0700 Subject: [PATCH 06/14] fix(optimizer): four gaps surfaced by the end-to-end docker run Discovered while wiring up the OFD flow on the live oh-hadoop-spark stack. Each fix lives in a file owned by a lower PR; consolidating on opt-5 per the delta-discovery framing, to be relocated in a follow-up. 1. FileCountBinPacker.cost() NPE on stats with null snapshot. The analyzer can legitimately schedule OFD against a table whose stats row has no snapshot (e.g. brand-new table). Treat that as cost 0 instead of crashing. 2. SchedulerRunner.schedule(OperationType) was non-@Transactional; the 3-arg overload was annotated but self-invocation bypasses the CGLIB proxy, so the @Modifying repository calls hit TransactionRequiredException. Annotate the no-arg variant too. 3. TableOperation model dropped jobId entirely (db row had it; api dto had a field but couldn't be populated). Wire jobId through model.toRow/fromRow and dto.toModel/fromModel so the SCHEDULED row exposes its Spark job id over the API. 4. jobs.yaml ORPHAN_FILES_DELETION mapping pointed at the single-table OrphanFilesDeletionSparkApp, which doesn't recognize --tableNames and crashed at arg parsing. Route to BatchedOrphanFilesDeletionSparkApp. Verified end-to-end: stats PUT -> analyzer PENDING -> scheduler SCHEDULED with jobId -> Livy -> BatchedOFD launches -> NoSuchTableException for the stub table -> POST /operations/{id}/complete with FAILED + errorMessage + errorType -> history row written. The success path is structurally the same; we'll exercise it once we wire up real Spark-table creation. Co-Authored-By: Claude Opus 4.7 --- .../com/linkedin/openhouse/scheduler/FileCountBinPacker.java | 3 +++ .../com/linkedin/openhouse/scheduler/SchedulerRunner.java | 1 + infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml | 2 +- .../openhouse/optimizer/api/spec/TableOperationsDto.java | 2 ++ .../linkedin/openhouse/optimizer/model/TableOperation.java | 5 +++++ 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java index 9110b0dea..060e001e8 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/FileCountBinPacker.java @@ -74,6 +74,9 @@ public List pack(List pending) { } private static long cost(TableStats stats) { + if (stats == null || stats.getSnapshot() == null) { + return 0L; + } Long n = stats.getSnapshot().getNumCurrentFiles(); return n != null ? n : 0L; } diff --git a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java index 124633aa3..2c602ba0d 100644 --- a/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java +++ b/apps/optimizer-scheduler/src/main/java/com/linkedin/openhouse/scheduler/SchedulerRunner.java @@ -49,6 +49,7 @@ public SchedulerRunner( } /** Schedule all PENDING operations of the given type across all databases. */ + @Transactional public void schedule(OperationType operationType) { schedule(operationType, Optional.empty(), Optional.empty()); } diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml b/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml index 3e795d4db..b666b45a8 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml @@ -51,7 +51,7 @@ jobs: << : *apps-defaults << : *livy-engine - type: ORPHAN_FILES_DELETION - class-name: com.linkedin.openhouse.jobs.spark.OrphanFilesDeletionSparkApp + class-name: com.linkedin.openhouse.jobs.spark.BatchedOrphanFilesDeletionSparkApp args: ["--backupDir", ".backup"] <<: *apps-defaults spark-properties: diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java index 496f59f42..14673c7c9 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/TableOperationsDto.java @@ -52,6 +52,7 @@ public TableOperation toModel() { .status(status == null ? null : status.toModel()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -69,6 +70,7 @@ public static TableOperationsDto fromModel(TableOperation op) { .status(OperationStatusDto.fromModel(op.getStatus())) .createdAt(op.getCreatedAt()) .scheduledAt(op.getScheduledAt()) + .jobId(op.getJobId()) .build(); } } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java index dcaf9aeea..6007ba0fb 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/model/TableOperation.java @@ -46,6 +46,9 @@ public class TableOperation { /** When the scheduler last submitted a job for this operation. */ private Instant scheduledAt; + /** Job ID returned by the Jobs Service after the scheduler submitted; null until SCHEDULED. */ + private String jobId; + /** Create a new PENDING operation for the given table and operation type. */ public static TableOperation pending(Table table, OperationType operationType) { return TableOperation.builder() @@ -77,6 +80,7 @@ public TableOperationsRow toRow() { .status(status == null ? null : status.toDb()) .createdAt(createdAt) .scheduledAt(scheduledAt) + .jobId(jobId) .build(); } @@ -94,6 +98,7 @@ public static TableOperation fromRow(TableOperationsRow row) { .status(OperationStatus.fromDb(row.getStatus())) .createdAt(row.getCreatedAt()) .scheduledAt(row.getScheduledAt()) + .jobId(row.getJobId()) .build(); } } From 65b72051dab8593cc9d686403bec2c800dadb067 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 14:19:10 -0700 Subject: [PATCH 07/14] fix(optimizer-5): close five wiring gaps surfaced by the end-to-end OFD demo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SUCCESS-path demo on -5 exposed five separate gaps between the merged component layers; each was a real bug that blocked the loop from running green out-of-the-box. 1. build.gradle dockerPrereqs + Dockerfile BUILD_DIR pointed at the pre-split paths (apps/optimizer-analyzer, apps/optimizer-scheduler). After the analyzer/scheduler module split into apps/optimizer/{analyzerapp,schedulerapp} and the corresponding build outputs land at build/{analyzerapp,schedulerapp}/libs/, dockerUp failed with "Task with path ':apps:optimizer-analyzer:bootJar' not found". 2. tables-service env var CLUSTER_OPTIMIZER_BASE_URI. The OptimizerTableStatsClient @ConditionalOnProperty(name= "cluster.optimizer.base-uri") did not activate from the cluster.yaml- loaded value alone in the running container — likely a property-source ordering race between the tables scan-package and ClusterProperties' @PropertySource. Setting the env var lets Spring's relaxed binding resolve the property deterministically before the conditional fires. Verified via /actuator/beans: optimizerTableStatsClient now appears in the bean list. 3. CadenceBasedOrphanFilesDeletionAnalyzer ambiguous-constructor crash. The class has two constructors (a public @Value-injected one and a package-private test one). Without @Autowired on either, Spring couldn't pick and threw NoSuchMethodException: ...() on every analyzer cron pass. The analyzer integration test wires the policy directly via the test constructor, so the @Component-scan path was never exercised — real bug only visible in the deployable app. 4. jobs.yaml --ttl 0 for ORPHAN_FILES_DELETION. The Spark app defaults to a 7-day older-than filter on candidate files. Newly-expired snapshots in the test stack are seconds old, so OFD reported 0 files deleted every run. Adding --ttl 0 makes all orphans eligible in the test recipe. End-to-end SUCCESS verified on 2/3 tables in the docker stack: demo_ofd_b (23 files / 101,503 bytes deleted) and demo_ofd_c (11 files / 47,385 bytes deleted) both reach status=SUCCESS in history with per-table metrics. The 3rd table hits a docker-stack OOM (executor SIGKILL/137) under three concurrent OFD jobs, not a code bug. Co-Authored-By: Claude Opus 4.7 (1M context) --- build.gradle | 12 ++++++------ .../oh-hadoop-spark/docker-compose.yml | 1 + .../recipes/docker-compose/oh-hadoop-spark/jobs.yaml | 2 +- optimizer-analyzer.Dockerfile | 2 +- optimizer-scheduler.Dockerfile | 2 +- .../CadenceBasedOrphanFilesDeletionAnalyzer.java | 1 + 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/build.gradle b/build.gradle index 964b67e3a..1f951a91c 100644 --- a/build.gradle +++ b/build.gradle @@ -178,8 +178,8 @@ tasks.register('CopyGitHooksTask', Copy) { // housetables-service.Dockerfile -> :services:housetables:bootJar // jobs-service.Dockerfile -> :services:jobs:bootJar // optimizer-service.Dockerfile -> :services:optimizer:bootJar -// optimizer-analyzer.Dockerfile -> :apps:optimizer-analyzer:bootJar -// optimizer-scheduler.Dockerfile -> :apps:optimizer-scheduler:bootJar +// optimizer-analyzer.Dockerfile -> :apps:optimizer:analyzerapp:bootJar +// optimizer-scheduler.Dockerfile -> :apps:optimizer:schedulerapp:bootJar // jobs-scheduler.Dockerfile -> :apps:openhouse-spark-apps_2.12:shadowJar (uber JAR) // spark-base-hadoop2.8.dockerfile -> // :integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar (uber JAR) @@ -200,8 +200,8 @@ tasks.register('dockerPrereqs') { dependsOn ':services:housetables:bootJar' dependsOn ':services:jobs:bootJar' dependsOn ':services:optimizer:bootJar' - dependsOn ':apps:optimizer-analyzer:bootJar' - dependsOn ':apps:optimizer-scheduler:bootJar' + dependsOn ':apps:optimizer:analyzerapp:bootJar' + dependsOn ':apps:optimizer:schedulerapp:bootJar' // Spark runtime uber JARs (shadowJar) dependsOn ':integrations:spark:spark-3.1:openhouse-spark-runtime_2.12:shadowJar' @@ -226,8 +226,8 @@ tasks.register('dockerPrereqs') { println ' build/housetables/libs/housetables.jar' println ' build/jobs/libs/jobs.jar' println ' build/optimizer/libs/optimizer.jar' - println ' build/optimizer-analyzer/libs/optimizer-analyzer.jar' - println ' build/optimizer-scheduler/libs/optimizer-scheduler.jar' + println ' build/analyzerapp/libs/analyzerapp.jar' + println ' build/schedulerapp/libs/schedulerapp.jar' println ' build/openhouse-spark-runtime_2.12/libs/openhouse-spark-runtime_2.12-uber.jar' println ' build/openhouse-spark-3.5-runtime_2.12/libs/openhouse-spark-3.5-runtime_2.12-uber.jar' println ' build/openhouse-spark-apps_2.12/libs/openhouse-spark-apps_2.12-uber.jar' diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml index 73e8e2e85..ec404ae81 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml @@ -22,6 +22,7 @@ services: - OTEL_METRICS_EXPORTER=none - OTEL_LOGS_EXPORTER=none - OTEL_TRACES_SAMPLER=always_on + - CLUSTER_OPTIMIZER_BASE_URI=http://openhouse-optimizer:8080 openhouse-jobs: container_name: local.openhouse-jobs diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml b/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml index b666b45a8..7a0c51e01 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/jobs.yaml @@ -52,7 +52,7 @@ jobs: << : *livy-engine - type: ORPHAN_FILES_DELETION class-name: com.linkedin.openhouse.jobs.spark.BatchedOrphanFilesDeletionSparkApp - args: ["--backupDir", ".backup"] + args: ["--backupDir", ".backup", "--ttl", "0"] <<: *apps-defaults spark-properties: <<: *spark-defaults diff --git a/optimizer-analyzer.Dockerfile b/optimizer-analyzer.Dockerfile index c2374c456..925018dc7 100644 --- a/optimizer-analyzer.Dockerfile +++ b/optimizer-analyzer.Dockerfile @@ -16,7 +16,7 @@ WORKDIR $USER_HOME ENV PATH=$PATH:/export/apps/jdk/JDK-1_8_0_172/bin/:$USER_HOME ARG VERSION="1.0.0-SNAPSHOT" -ARG BUILD_DIR="build/$APP_NAME/libs" +ARG BUILD_DIR="build/analyzerapp/libs" ARG JAR_FILES=$BUILD_DIR/*.jar COPY $JAR_FILES ./ diff --git a/optimizer-scheduler.Dockerfile b/optimizer-scheduler.Dockerfile index d83fbe9e6..9be0f3d4c 100644 --- a/optimizer-scheduler.Dockerfile +++ b/optimizer-scheduler.Dockerfile @@ -16,7 +16,7 @@ WORKDIR $USER_HOME ENV PATH=$PATH:/export/apps/jdk/JDK-1_8_0_172/bin/:$USER_HOME ARG VERSION="1.0.0-SNAPSHOT" -ARG BUILD_DIR="build/$APP_NAME/libs" +ARG BUILD_DIR="build/schedulerapp/libs" ARG JAR_FILES=$BUILD_DIR/*.jar COPY $JAR_FILES ./ diff --git a/services/optimizer/analyzer/src/main/java/com/linkedin/openhouse/optimizer/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java b/services/optimizer/analyzer/src/main/java/com/linkedin/openhouse/optimizer/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java index 1f4b31542..f344dcb2b 100644 --- a/services/optimizer/analyzer/src/main/java/com/linkedin/openhouse/optimizer/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java +++ b/services/optimizer/analyzer/src/main/java/com/linkedin/openhouse/optimizer/analyzer/CadenceBasedOrphanFilesDeletionAnalyzer.java @@ -51,6 +51,7 @@ public class CadenceBasedOrphanFilesDeletionAnalyzer implements OperationAnalyze private final CadencePolicy cadencePolicy; + @org.springframework.beans.factory.annotation.Autowired public CadenceBasedOrphanFilesDeletionAnalyzer( @Value("${ofd.success-retry-hours:16}") long successRetryHours, @Value("${ofd.failure-retry-hours:1}") long failureRetryHours) { From 205f86556d1b736048199624789e876c6dfa5776 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 14:19:26 -0700 Subject: [PATCH 08/14] feat(optimizer-5): port end-to-end OFD demo scripts from the integration tip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four scripts at the repo root that drive the optimizer loop against a local oh-hadoop-spark docker stack, adapted for the -5 API surface: - local-spark-sql.sh — Livy session-reuse shim for one-liner Spark SQL. - demo-setup.sh — create N tables with maintenance.optimizer.ofd.enabled =true, populate via INSERT OVERWRITE, expire snapshots (producing orphan data files on HDFS), then wait for the continuous analyzer + scheduler to land SCHEDULED rows. - demo-check.sh — show per-table HDFS file counts and the most recent operations-history entries per tableUuid. - demo.sh — wraps the setup, polls for SUCCESS history per table, and runs demo-check.sh at the end. URLs adapted from the tip's pre-stack surface (/v1/table-stats, /v1/table-operations, port 8003) to -5's stacked surface (/v1/optimizer/stats, /v1/optimizer/operations, /v1/optimizer/operations-history/{tableUuid}, port 8005, required ?limit on list endpoints, history lookup by tableUuid rather than by databaseName). The analyzer + scheduler run continuously on -5 (every 30s loop in the analyzer/scheduler containers), so the manual "run-analyzer"/ "run-scheduler" docker-compose profiles the tip used are dropped — the demo just waits for the next cron pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- demo-check.sh | 27 +++++++++ demo-setup.sh | 140 +++++++++++++++++++++++++++++++++++++++++++++ demo.sh | 39 +++++++++++++ local-spark-sql.sh | 85 +++++++++++++++++++++++++++ 4 files changed, 291 insertions(+) create mode 100755 demo-check.sh create mode 100755 demo-setup.sh create mode 100755 demo.sh create mode 100755 local-spark-sql.sh diff --git a/demo-check.sh b/demo-check.sh new file mode 100755 index 000000000..d7c60c55d --- /dev/null +++ b/demo-check.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# demo-check.sh — Run after demo-setup.sh. Verifies SUCCESS history rows and +# shows HDFS file counts per table. +set -e + +OPT_API="http://localhost:8005" + +if [ ! -f /tmp/demo_ofd_locs.txt ] || [ ! -f /tmp/demo_ofd_uuids.txt ]; then + echo "ERROR: /tmp/demo_ofd_locs.txt or /tmp/demo_ofd_uuids.txt not found — run demo-setup.sh first" + exit 1 +fi + +echo "=== HDFS data files after OFD ===" +while IFS='=' read -r TABLE TABLE_LOC; do + FILE_COUNT=$(docker exec local.namenode \ + hdfs dfs -ls -R "$TABLE_LOC/data/" 2>/dev/null \ + | grep -c "\.orc" || echo "0") + echo " $TABLE: $FILE_COUNT files remaining" +done < /tmp/demo_ofd_locs.txt + +echo "" +echo "=== Operation history per table ===" +while IFS='=' read -r TABLE TABLE_UUID; do + echo " --- $TABLE ($TABLE_UUID) ---" + curl -sf "$OPT_API/v1/optimizer/operations-history/$TABLE_UUID?limit=10" \ + | jq '.[] | {status, orphanFilesDeleted, orphanBytesDeleted, errorMessage, completedAt}' +done < /tmp/demo_ofd_uuids.txt diff --git a/demo-setup.sh b/demo-setup.sh new file mode 100755 index 000000000..b39116a4d --- /dev/null +++ b/demo-setup.sh @@ -0,0 +1,140 @@ +#!/usr/bin/env bash +# demo-setup.sh — Populate tables, expire snapshots, wait for the continuous +# analyzer + scheduler to schedule OFD jobs. +# +# Prerequisites: ./gradlew dockerUp (stack must be healthy) +# +# After this script completes, batched-OFD Spark jobs have been submitted via +# the continuous scheduler. Use demo-check.sh to watch for completion. +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TOKEN=$(cat "$SCRIPT_DIR/services/common/src/main/resources/dummy.token") +TABLES_API="http://localhost:8000" +JOBS_API="http://localhost:8002/jobs" +OPT_API="http://localhost:8005" +LIVY_API="http://localhost:9003" + +TABLES="demo_ofd_a:5 demo_ofd_b:7 demo_ofd_c:4" +TABLE_COUNT=3 +ANALYZER_WAIT_SECS=90 +SCHEDULER_WAIT_SECS=90 + +wait_for_job() { + local JOB_ID="$1" LABEL="$2" MAX_SECS="${3:-180}" + local i=0 + while [ $i -lt "$MAX_SECS" ]; do + STATE=$(curl -sf "$JOBS_API/$JOB_ID" | jq -r '.state // empty' 2>/dev/null || echo "") + [ "$STATE" = "SUCCEEDED" ] && return 0 + [ "$STATE" = "FAILED" ] && { echo "FAIL: $LABEL job $JOB_ID FAILED"; exit 1; } + sleep 5 + i=$((i + 5)) + done + echo "FAIL: $LABEL job $JOB_ID timed out after ${MAX_SECS}s (last state: $STATE)" + exit 1 +} + +kill_idle_session() { + IDLE=$(curl -sf "$LIVY_API/sessions" \ + | jq -r '[.sessions[] | select(.state=="idle")] | first | .id // empty') + if [ -n "$IDLE" ]; then + curl -sf -X DELETE "$LIVY_API/sessions/$IDLE" > /dev/null + echo " Freed idle Livy session $IDLE" + fi +} + +wait_for_count() { + local URL="$1" EXPECTED="$2" LABEL="$3" MAX_SECS="$4" + local i=0 + while [ $i -lt "$MAX_SECS" ]; do + COUNT=$(curl -sf "$URL" | jq 'length' 2>/dev/null || echo "0") + [ "$COUNT" -ge "$EXPECTED" ] && return 0 + printf " %s: %d/%d (waited %ds)\r" "$LABEL" "$COUNT" "$EXPECTED" "$i" + sleep 5 + i=$((i + 5)) + done + echo "" + echo "FAIL: $LABEL expected $EXPECTED, got $COUNT after ${MAX_SECS}s" + exit 1 +} + +echo "=== [1/4] Create tables and populate with data ===" +rm -f /tmp/demo_ofd_locs.txt /tmp/demo_ofd_uuids.txt + +for entry in $TABLES; do + TABLE="${entry%%:*}" + WRITES="${entry##*:}" + ORPHANS=$((WRITES - 2)) + + "$SCRIPT_DIR/local-spark-sql.sh" "DROP TABLE IF EXISTS openhouse.db1.$TABLE" > /dev/null + "$SCRIPT_DIR/local-spark-sql.sh" "CREATE TABLE openhouse.db1.$TABLE ( + id STRING, val STRING + ) TBLPROPERTIES ('maintenance.optimizer.ofd.enabled'='true')" > /dev/null + + for i in $(seq 1 "$WRITES"); do + "$SCRIPT_DIR/local-spark-sql.sh" \ + "INSERT OVERWRITE openhouse.db1.$TABLE VALUES ('$i', 'row$i')" > /dev/null + printf " $TABLE: insert %d/%d\r" "$i" "$WRITES" + done + echo "" + + TBL_JSON=$(curl -sf -H "Authorization: Bearer $TOKEN" \ + "$TABLES_API/v1/databases/db1/tables/$TABLE") + TABLE_LOC=$(dirname "$(echo "$TBL_JSON" | jq -r '.tableLocation')") + TABLE_UUID=$(echo "$TBL_JSON" | jq -r '.tableUUID') + echo " $TABLE -> $TABLE_LOC ($WRITES snapshots, $ORPHANS will become orphans)" + echo " $TABLE uuid=$TABLE_UUID" + echo "$TABLE=$TABLE_LOC" >> /tmp/demo_ofd_locs.txt + echo "$TABLE=$TABLE_UUID" >> /tmp/demo_ofd_uuids.txt +done + +STATS_COUNT=$(curl -sf "$OPT_API/v1/optimizer/stats?limit=100" | jq 'length') +[ "$STATS_COUNT" -ge "$TABLE_COUNT" ] \ + || { echo "FAIL: expected $TABLE_COUNT stats rows, got $STATS_COUNT"; exit 1; } +echo "PASS: $STATS_COUNT stats rows posted by Tables Service on-commit hook" + +echo "" +echo "=== [2/4] Expire old snapshots (creates orphan data files) ===" +kill_idle_session + +EXPIRE_JOBS="" +for entry in $TABLES; do + TABLE="${entry%%:*}" + BODY=$(jq -n --arg n "demo-expire-$TABLE" --arg t "db1.$TABLE" \ + '{jobName:$n, clusterId:"LocalHadoopCluster", + jobConf:{jobType:"SNAPSHOTS_EXPIRATION", + args:["--tableName",$t,"--maxAge","1","--granularity","days","--versions","1"]}}') + JOB_ID=$(curl -sf -X POST "$JOBS_API" -H "Content-Type: application/json" -d "$BODY" \ + | jq -r '.jobId') + [ -n "$JOB_ID" ] || { echo "FAIL: could not submit expiration job for $TABLE"; exit 1; } + echo " $TABLE: submitted $JOB_ID" + EXPIRE_JOBS="$EXPIRE_JOBS $JOB_ID" +done + +for JOB_ID in $EXPIRE_JOBS; do + wait_for_job "$JOB_ID" "snapshot-expiration" 180 + echo " $JOB_ID: SUCCEEDED" +done + +while IFS='=' read -r TABLE TABLE_LOC; do + COUNT=$(docker exec local.namenode \ + hdfs dfs -ls -R "$TABLE_LOC/data/" 2>/dev/null | grep -c "\.orc" || echo "0") + [ "$COUNT" -ge 2 ] \ + || { echo "FAIL: $TABLE has $COUNT files after expiration, expected >= 2"; exit 1; } + echo " $TABLE: $COUNT files on HDFS ($((COUNT-1)) orphans + 1 live)" +done < /tmp/demo_ofd_locs.txt + +echo "" +echo "=== [3/4] Wait for analyzer → scheduler → SCHEDULED ===" +echo " (analyzer and scheduler both run every ~30s; PENDING is transient)" +wait_for_count "$OPT_API/v1/optimizer/operations?status=SCHEDULED&limit=100" \ + "$TABLE_COUNT" "SCHEDULED ops" "$(($ANALYZER_WAIT_SECS + $SCHEDULER_WAIT_SECS))" +echo "" +echo "PASS: $TABLE_COUNT operations SCHEDULED" + +echo "" +echo "=== [4/4] (skip) — SCHEDULED is the SUCCESS-readiness signal ===" + +echo "" +echo "Setup complete. Batched-OFD Spark jobs have been launched." +echo "Run ./demo-check.sh to verify SUCCESS in history + orphan files deleted." diff --git a/demo.sh b/demo.sh new file mode 100755 index 000000000..89ea74af3 --- /dev/null +++ b/demo.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# demo.sh — Full end-to-end optimizer demo from a clean docker stack. +# +# Prerequisites: ./gradlew dockerUp (stack must be healthy) +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +OPT_API="http://localhost:8005" + +TABLE_COUNT=3 +SUCCESS_WAIT_SECS=300 + +bash "$SCRIPT_DIR/demo-setup.sh" + +echo "" +echo "=== [Wait] OFD Spark jobs to complete (up to 5 min) ===" +for i in $(seq 1 30); do + TOTAL_HISTORY=0 + SUCCESS_HISTORY=0 + if [ -f /tmp/demo_ofd_uuids.txt ]; then + while IFS='=' read -r _ UUID; do + LATEST=$(curl -sf "$OPT_API/v1/optimizer/operations-history/$UUID?limit=1" \ + | jq -r '.[0].status // empty' 2>/dev/null || echo "") + [ -n "$LATEST" ] && TOTAL_HISTORY=$((TOTAL_HISTORY + 1)) + [ "$LATEST" = "SUCCESS" ] && SUCCESS_HISTORY=$((SUCCESS_HISTORY + 1)) + done < /tmp/demo_ofd_uuids.txt + fi + [ "$SUCCESS_HISTORY" -ge "$TABLE_COUNT" ] && break + echo " $i/30: $SUCCESS_HISTORY SUCCESS / $TOTAL_HISTORY total history rows..." + sleep 10 +done + +[ "$SUCCESS_HISTORY" -ge "$TABLE_COUNT" ] \ + || { echo "FAIL: only $SUCCESS_HISTORY/$TABLE_COUNT operations reached SUCCESS"; bash "$SCRIPT_DIR/demo-check.sh"; exit 1; } + +echo "" +echo "PASS: all $TABLE_COUNT operations completed with SUCCESS" +echo "" +bash "$SCRIPT_DIR/demo-check.sh" diff --git a/local-spark-sql.sh b/local-spark-sql.sh new file mode 100755 index 000000000..45582f698 --- /dev/null +++ b/local-spark-sql.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# local-spark-sql.sh — Execute a SQL statement against the local OpenHouse docker cluster. +# +# Reuses an existing idle Livy session if one exists. First run pays the cold start (~20s), +# subsequent runs execute immediately. +# +# Usage: ./local-spark-sql.sh "INSERT INTO openhouse.db1.smoke_tbl VALUES ('3', 'c')" +# ./local-spark-sql.sh --kill # tear down the reusable session +set -e + +LIVY="http://localhost:9003" +TOKEN=$(cat /Users/mkuchenb/code/openhouse/services/common/src/main/resources/dummy.token) + +if [ "$1" = "--kill" ]; then + SESSIONS=$(curl -sf "$LIVY/sessions" | jq -r '.sessions[].id') + for sid in $SESSIONS; do + curl -sf -X DELETE "$LIVY/sessions/$sid" > /dev/null 2>&1 + echo "Killed session $sid" + done + exit 0 +fi + +if [ -z "$1" ]; then + echo "Usage: local-spark-sql.sh \"\"" + echo " local-spark-sql.sh --kill" + exit 1 +fi + +SQL="$1" + +SESSION_ID=$(curl -sf "$LIVY/sessions" | jq -r '[.sessions[] | select(.state == "idle")] | first | .id // empty') + +if [ -z "$SESSION_ID" ]; then + SPARK_CONF=$(jq -n --arg token "$TOKEN" '{ + kind: "sql", + conf: { + "spark.jars": "local:/opt/spark/openhouse-spark-runtime_2.12-latest-all.jar", + "spark.jars.packages": "org.apache.iceberg:iceberg-spark-runtime-3.1_2.12:1.2.0", + "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,com.linkedin.openhouse.spark.extensions.OpenhouseSparkSessionExtensions", + "spark.sql.catalog.openhouse": "org.apache.iceberg.spark.SparkCatalog", + "spark.sql.catalog.openhouse.catalog-impl": "com.linkedin.openhouse.spark.OpenHouseCatalog", + "spark.sql.catalog.openhouse.uri": "http://openhouse-tables:8080", + "spark.sql.catalog.openhouse.auth-token": $token, + "spark.sql.catalog.openhouse.cluster": "LocalHadoopCluster" + } + }') + + echo "No idle session found, creating one..." + SESSION_ID=$(curl -sf -X POST "$LIVY/sessions" \ + -H "Content-Type: application/json" \ + -d "$SPARK_CONF" | jq -r '.id') + + for i in $(seq 1 60); do + STATE=$(curl -sf "$LIVY/sessions/$SESSION_ID/state" | jq -r '.state') + [ "$STATE" = "idle" ] && break + if [ "$STATE" = "error" ] || [ "$STATE" = "dead" ]; then + echo "FAIL: session state=$STATE" + exit 1 + fi + sleep 2 + done + [ "$STATE" = "idle" ] || { echo "FAIL: session never reached idle (state=$STATE)"; exit 1; } +fi + +STMT_ID=$(curl -sf -X POST "$LIVY/sessions/$SESSION_ID/statements" \ + -H "Content-Type: application/json" \ + -d "$(jq -n --arg code "$SQL" '{code: $code}')" | jq -r '.id') + +for i in $(seq 1 60); do + STMT_STATE=$(curl -sf "$LIVY/sessions/$SESSION_ID/statements/$STMT_ID" | jq -r '.state') + [ "$STMT_STATE" = "available" ] && break + [ "$STMT_STATE" = "error" ] && break + sleep 1 +done + +RESULT=$(curl -sf "$LIVY/sessions/$SESSION_ID/statements/$STMT_ID") +STATUS=$(echo "$RESULT" | jq -r '.output.status') + +if [ "$STATUS" = "error" ]; then + echo "FAIL: SQL execution error:" + echo "$RESULT" | jq -r '.output.evalue, .output.traceback[]?' + exit 1 +fi + +echo "$RESULT" | jq -r '.output.data["text/plain"] // ""' From 6e2669ba5ca18ab88d8d5972ff7ce16240a7b8ab Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 13:26:06 -0700 Subject: [PATCH 09/14] feat(tables): post-commit stats push to optimizer After a successful Iceberg snapshot commit, push the table's current snapshot summary to the optimizer's PUT /v1/optimizer/stats/{tableUuid} endpoint. Opt-in per table via the maintenance.optimizer.stats.enabled table property; globally gated by optimizer.stats.enabled in application.properties (default false). Implementation notes: - Stats sourced from the in-memory Snapshot.summary() map captured at convertToTableDto(); no extra HDFS round-trip on the commit path. - WebClient + Reactor pipeline runs fire-and-forget on the Netty event loop. The hook on the commit thread is .subscribe() and returns immediately, so a slow/down optimizer cannot impact commit latency. - Per-attempt timeout 1000 ms, total budget 2000 ms, up to 3 attempts. Retries only fire on retryable errors (network, 408, 429, 5xx, TimeoutException). All terminal errors are swallowed; Micrometer counters and a warn log preserve observability. - Bean wiring (WebClient, client component) is @ConditionalOnProperty on optimizer.stats.enabled=true. The service consumes the client as an Optional, so disabled deployments construct nothing. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../common/metrics/MetricsConstant.java | 6 + services/tables/build.gradle | 1 + .../openhouse/tables/model/TableDto.java | 13 + .../impl/InternalRepositoryUtils.java | 2 + .../services/IcebergSnapshotsServiceImpl.java | 17 +- .../optimizer/OptimizerStatsClient.java | 245 ++++++++++++++++++ .../optimizer/OptimizerStatsConfig.java | 35 +++ .../optimizer/OptimizerStatsProperties.java | 43 +++ .../optimizer/OptimizerStatsRequest.java | 70 +++++ .../src/main/resources/application.properties | 10 +- .../service/IcebergSnapshotsServiceTest.java | 37 +++ .../tables/model/TableDtoMappingTest.java | 3 +- .../optimizer/OptimizerStatsClientTest.java | 242 +++++++++++++++++ 13 files changed, 721 insertions(+), 3 deletions(-) create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java create mode 100644 services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java diff --git a/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java b/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java index baf65c627..7a47b790e 100644 --- a/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java +++ b/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java @@ -63,4 +63,10 @@ private MetricsConstant() {} public static final String HTS_LIST_TABLES_REQUEST = "hts_list_tables_request"; public static final String HTS_LIST_TABLES_TIME = "hts_list_tables_time"; public static final String HTS_SEARCH_TABLES_TIME = "hts_search_tables_time"; + + // Optimizer post-commit stats push (Tables → Optimizer) + public static final String OPTIMIZER_STATS_DURATION = "optimizer_stats_duration"; + public static final String OPTIMIZER_STATS_ATTEMPTS = "optimizer_stats_attempts"; + public static final String OPTIMIZER_STATS_SKIPPED = "optimizer_stats_skipped"; + public static final String OPTIMIZER_STATS_FAILED_FINAL = "optimizer_stats_failed_final"; } diff --git a/services/tables/build.gradle b/services/tables/build.gradle index c85a57131..da42c32ef 100644 --- a/services/tables/build.gradle +++ b/services/tables/build.gradle @@ -44,6 +44,7 @@ dependencies { testImplementation 'org.junit.jupiter:junit-jupiter-engine:' + junit_version testImplementation 'org.springframework.security:spring-security-test:5.7.3' testImplementation 'org.springframework:spring-context-support:5.3.18' + testImplementation 'com.squareup.okhttp3:mockwebserver:4.10.0' testImplementation(testFixtures(project(':services:common'))) testImplementation (project(':tables-test-fixtures:tables-test-fixtures_2.12')) { exclude group: 'com.linkedin.iceberg' diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java index 917c632d8..24373f4a1 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java @@ -16,6 +16,7 @@ import javax.persistence.Entity; import javax.persistence.Id; import javax.persistence.IdClass; +import javax.persistence.Transient; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Builder; @@ -83,6 +84,18 @@ public class TableDto { private boolean replaceCommit; + /** + * Iceberg {@code Snapshot.summary()} for the current snapshot at the time the {@code TableDto} + * was constructed (post-commit on save; post-load on read). Populated only when an Iceberg {@code + * Table} is available — i.e. by {@code InternalRepositoryUtils.convertToTableDto}. Not persisted, + * not part of equality. + * + *

Used downstream by the optimizer post-commit stats push (see {@code + * services.optimizer.OptimizerStatsClient}) so that the service layer can read snapshot stats + * without a separate HDFS round-trip. + */ + @Transient @EqualsAndHashCode.Exclude private Map currentSnapshotSummary; + /** * Bundling eligible string type field into a map as {@link org.mapstruct.Mapper} doesn't provide * easy interface to achieve so. diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java index 0dde039c0..9ba40c2d8 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java @@ -135,6 +135,8 @@ static TableDto convertToTableDto( .jsonSnapshots(null) .tableProperties(megaProps) .sortOrder(SortOrderParser.toJson(table.sortOrder())) + .currentSnapshotSummary( + table.currentSnapshot() == null ? null : table.currentSnapshot().summary()) .build(); return tableDto; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java index af43a169e..bb2cbde4c 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java @@ -10,6 +10,7 @@ import com.linkedin.openhouse.tables.model.TableDto; import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository; +import com.linkedin.openhouse.tables.services.optimizer.OptimizerStatsClient; import com.linkedin.openhouse.tables.utils.AuthorizationUtils; import com.linkedin.openhouse.tables.utils.TableUUIDGenerator; import java.util.Optional; @@ -32,6 +33,13 @@ public class IcebergSnapshotsServiceImpl implements IcebergSnapshotsService { @Autowired AuthorizationUtils authorizationUtils; + /** + * Present only when {@code optimizer.stats.enabled=true}. When absent, no post-commit push is + * attempted. + */ + @Autowired(required = false) + Optional optimizerStatsClient = Optional.empty(); + @Override public Pair putIcebergSnapshots( String databaseId, @@ -83,7 +91,14 @@ public Pair putIcebergSnapshots( databaseId, tableCreatorUpdater, Privileges.CREATE_TABLE); } try { - return Pair.of(openHouseInternalRepository.save(tableDtoToSave), !tableDto.isPresent()); + TableDto savedDto = openHouseInternalRepository.save(tableDtoToSave); + // Fire-and-forget push of the post-commit snapshot summary to the optimizer. Returns + // immediately; failures are swallowed inside the client. Skipped at the client when the + // table is not opted in via maintenance.optimizer.stats.enabled or when there is no current + // snapshot (e.g. CREATE TABLE without a data commit). + optimizerStatsClient.ifPresent( + client -> client.report(savedDto, savedDto.getCurrentSnapshotSummary())); + return Pair.of(savedDto, !tableDto.isPresent()); } catch (BadRequestException e) { throw new RequestValidationFailureException(e.getMessage(), e); } catch (CommitFailedException ce) { diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java new file mode 100644 index 000000000..23c69082e --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java @@ -0,0 +1,245 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import com.linkedin.openhouse.common.metrics.MetricsConstant; +import com.linkedin.openhouse.tables.model.TableDto; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Timer; +import java.time.Duration; +import java.util.Map; +import java.util.concurrent.TimeoutException; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.reactive.function.client.WebClientRequestException; +import org.springframework.web.reactive.function.client.WebClientResponseException; +import reactor.core.publisher.Mono; +import reactor.util.retry.Retry; +import reactor.util.retry.RetrySpec; + +/** + * Pushes a stats record to the optimizer's {@code PUT /v1/optimizer/stats/{tableUuid}} endpoint + * after a successful Iceberg snapshot commit. Fire-and-forget — the {@link #report} call returns + * immediately on the commit thread; the HTTP exchange runs on the WebClient's Netty event loop. + * + *

Errors are recorded as Micrometer metrics and logged at {@code warn} level, but never + * propagated. A push failure must not break the commit. + * + *

Timeout / retry shape (from {@link OptimizerStatsProperties}): + * + *

+ * + *

The bean is only wired when {@code optimizer.stats.enabled=true}. + */ +@Slf4j +@Component +@ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") +public class OptimizerStatsClient { + + /** Per-call URL path. */ + static final String PATH_TEMPLATE = "/v1/optimizer/stats/{tableUuid}"; + + /** Table-property key that opts a table in for the post-commit stats push. */ + static final String OPT_IN_PROPERTY = "maintenance.optimizer.stats.enabled"; + + /** Iceberg snapshot-summary keys we read. All values are decimal-string longs. */ + static final String SUMMARY_TOTAL_DATA_FILES = "total-data-files"; + + static final String SUMMARY_TOTAL_FILES_SIZE = "total-files-size"; + static final String SUMMARY_ADDED_DATA_FILES = "added-data-files"; + static final String SUMMARY_DELETED_DATA_FILES = "deleted-data-files"; + static final String SUMMARY_ADDED_FILES_SIZE = "added-files-size"; + static final String SUMMARY_REMOVED_FILES_SIZE = "removed-files-size"; + + private final WebClient webClient; + private final MeterRegistry meterRegistry; + private final OptimizerStatsProperties properties; + + public OptimizerStatsClient( + @Qualifier("optimizerStatsWebClient") WebClient webClient, + MeterRegistry meterRegistry, + OptimizerStatsProperties properties) { + this.webClient = webClient; + this.meterRegistry = meterRegistry; + this.properties = properties; + } + + /** + * Build and dispatch a stats push for the just-committed table. Returns immediately; the HTTP + * call runs in the background. {@code snapshotSummary} is the in-memory {@code + * Snapshot.summary()} map from the latest snapshot (no HDFS round-trip). + * + *

The call is skipped (and a {@code skipped} counter is emitted) when: + * + *

+ */ + public void report(TableDto saved, Map snapshotSummary) { + reportAsync(saved, snapshotSummary).subscribe(); + } + + /** + * Same as {@link #report} but returns the underlying {@link Mono} so callers (notably tests) can + * block on completion. Production callers use {@link #report}. + */ + Mono reportAsync(TableDto saved, Map snapshotSummary) { + if (!isOptedIn(saved)) { + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "opt_out") + .increment(); + return Mono.empty(); + } + if (snapshotSummary == null || snapshotSummary.isEmpty()) { + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "no_snapshot") + .increment(); + return Mono.empty(); + } + + OptimizerStatsRequest body = buildRequest(saved, snapshotSummary); + String tableUuid = saved.getTableUUID(); + Timer.Sample sample = Timer.start(meterRegistry); + + RetrySpec retrySpec = + Retry.max(Math.max(0, properties.getMaxAttempts() - 1)) + .filter(this::isRetryable) + .onRetryExhaustedThrow((spec, signal) -> signal.failure()); + + return webClient + .put() + .uri(PATH_TEMPLATE, tableUuid) + .bodyValue(body) + .retrieve() + .toBodilessEntity() + .timeout(Duration.ofMillis(properties.getPerAttemptTimeoutMs())) + .doOnError( + e -> + meterRegistry + .counter( + MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, + "outcome", + isRetryable(e) ? "retryable_failure" : "non_retryable_failure") + .increment()) + .doOnSuccess( + ignored -> + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") + .increment()) + .retryWhen(retrySpec) + .timeout(Duration.ofMillis(properties.getTotalTimeoutMs())) + .doOnSuccess( + ignored -> + sample.stop( + meterRegistry.timer( + MetricsConstant.OPTIMIZER_STATS_DURATION, "outcome", "success"))) + .onErrorResume( + e -> { + String outcome = classifyOutcome(e); + sample.stop( + meterRegistry.timer( + MetricsConstant.OPTIMIZER_STATS_DURATION, "outcome", outcome)); + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", outcome) + .increment(); + log.warn( + "Optimizer stats push failed for table {} ({}): {}", + tableUuid, + outcome, + e.toString()); + return Mono.empty(); + }) + .then(); + } + + /** {@code true} iff the table's properties contain the literal opt-in value {@code "true"}. */ + private boolean isOptedIn(TableDto saved) { + Map props = saved.getTableProperties(); + return props != null && "true".equals(props.get(OPT_IN_PROPERTY)); + } + + /** Build the wire body. Missing summary keys default to 0L. */ + OptimizerStatsRequest buildRequest(TableDto saved, Map summary) { + OptimizerStatsRequest.Snapshot snapshot = + OptimizerStatsRequest.Snapshot.builder() + .tableVersion(saved.getTableVersion()) + .tableLocation(saved.getTableLocation()) + .tableSizeBytes(longOrZero(summary, SUMMARY_TOTAL_FILES_SIZE)) + .numCurrentFiles(longOrZero(summary, SUMMARY_TOTAL_DATA_FILES)) + .build(); + OptimizerStatsRequest.Delta delta = + OptimizerStatsRequest.Delta.builder() + .numFilesAdded(longOrZero(summary, SUMMARY_ADDED_DATA_FILES)) + .numFilesDeleted(longOrZero(summary, SUMMARY_DELETED_DATA_FILES)) + .addedSizeBytes(longOrZero(summary, SUMMARY_ADDED_FILES_SIZE)) + .deletedSizeBytes(longOrZero(summary, SUMMARY_REMOVED_FILES_SIZE)) + .build(); + return OptimizerStatsRequest.builder() + .databaseName(saved.getDatabaseId()) + .tableName(saved.getTableId()) + .stats(OptimizerStatsRequest.Stats.builder().snapshot(snapshot).delta(delta).build()) + .tableProperties(saved.getTableProperties()) + .build(); + } + + /** + * Retryable errors: per-attempt timeout, network-level failures, 5xx, 408, 429. Other 4xx are + * client errors — retrying won't fix them. + */ + boolean isRetryable(Throwable e) { + if (e instanceof TimeoutException) { + return true; + } + if (e instanceof WebClientRequestException) { + return true; + } + if (e instanceof WebClientResponseException) { + int code = ((WebClientResponseException) e).getStatusCode().value(); + return code == 408 || code == 429 || (code >= 500 && code < 600); + } + return false; + } + + /** + * Map a final-stage error to one of {@code success, timeout, network_error, server_error, + * client_error, unknown_error} for the {@code outcome} tag on duration / failed_final metrics. + */ + private String classifyOutcome(Throwable e) { + if (e instanceof TimeoutException) { + return "timeout"; + } + if (e instanceof WebClientRequestException) { + return "network_error"; + } + if (e instanceof WebClientResponseException) { + int code = ((WebClientResponseException) e).getStatusCode().value(); + if (code >= 500 && code < 600) { + return "server_error"; + } + if (code >= 400 && code < 500) { + return "client_error"; + } + } + return "unknown_error"; + } + + private static long longOrZero(Map m, String key) { + String v = m.get(key); + if (v == null) { + return 0L; + } + try { + return Long.parseLong(v); + } catch (NumberFormatException nfe) { + return 0L; + } + } +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java new file mode 100644 index 000000000..bbddbce6b --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java @@ -0,0 +1,35 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.http.client.reactive.ReactorClientHttpConnector; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.netty.http.client.HttpClient; + +/** + * Wiring for the post-commit Tables → Optimizer stats push. Only active when {@code + * optimizer.stats.enabled=true}; otherwise no WebClient or client bean is constructed and the + * on-commit hook in {@code IcebergSnapshotsServiceImpl} is a no-op. + */ +@Configuration +@EnableConfigurationProperties(OptimizerStatsProperties.class) +@ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") +public class OptimizerStatsConfig { + + /** + * Dedicated WebClient for the optimizer stats endpoint. Per-attempt and outer timeouts are + * applied at the call site on the Reactor chain in {@link OptimizerStatsClient} — they are not + * configured on the underlying Netty client so that the timeout always emerges as a standard + * {@link java.util.concurrent.TimeoutException} (not a Netty {@code ReadTimeoutException}), which + * keeps the client's outcome classification simple. + */ + @Bean("optimizerStatsWebClient") + public WebClient optimizerStatsWebClient(OptimizerStatsProperties properties) { + return WebClient.builder() + .baseUrl(properties.getBaseUri()) + .clientConnector(new ReactorClientHttpConnector(HttpClient.create())) + .build(); + } +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java new file mode 100644 index 000000000..7d746ee93 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java @@ -0,0 +1,43 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import lombok.Getter; +import lombok.Setter; +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + * Configuration for the post-commit stats push from the Tables Service to the Optimizer's stats + * endpoint. Property prefix: {@code optimizer.stats}. + * + *

When {@code enabled} is false (the default), no client bean is constructed and the push is a + * no-op on every commit. Environments that want the feed turn it on and set {@link #baseUri}. + */ +@ConfigurationProperties("optimizer.stats") +@Getter +@Setter +public class OptimizerStatsProperties { + + /** Master switch. When {@code false}, no HTTP push is attempted and no client bean is wired. */ + private boolean enabled = false; + + /** + * Base URI of the Optimizer service. Path {@code /v1/optimizer/stats/{tableUuid}} is appended at + * call time. No default — required when {@link #enabled} is {@code true}. + */ + private String baseUri; + + /** Per-attempt request timeout in milliseconds. Default 1000. */ + private long perAttemptTimeoutMs = 1000L; + + /** + * Hard ceiling on total wall-clock time for the entire call (including retries) in milliseconds. + * Default 2000. When the outer timeout fires, the chain is cancelled and the error is swallowed. + */ + private long totalTimeoutMs = 2000L; + + /** + * Total attempt count (initial try plus retries). Default 3. With 1000 ms per attempt and a 2000 + * ms outer ceiling, only about two attempts realistically fit on a slow path; the third is + * available if attempts return quickly. + */ + private int maxAttempts = 3; +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java new file mode 100644 index 000000000..68a8646da --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java @@ -0,0 +1,70 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import com.fasterxml.jackson.annotation.JsonInclude; +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.NoArgsConstructor; + +/** + * Wire body for {@code PUT /v1/optimizer/stats/{tableUuid}}. Mirrors the optimizer's {@code + * UpsertTableStatsRequest} field-for-field — see {@code + * services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java}. + * + *

Tables service owns its own copy so that the wire contract is explicit at the call site and + * the optimizer client jar is not a compile-time dependency. + */ +@Data +@Builder +@NoArgsConstructor +@AllArgsConstructor +@JsonInclude(JsonInclude.Include.NON_NULL) +public class OptimizerStatsRequest { + + private String databaseName; + private String tableName; + private Stats stats; + private Map tableProperties; + + @Data + @Builder + @NoArgsConstructor + @AllArgsConstructor + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Stats { + private Snapshot snapshot; + private Delta delta; + } + + /** + * Point-in-time snapshot metrics. Map to optimizer's {@code + * TableStatsPayload.SnapshotMetricsDto}. + */ + @Data + @Builder + @NoArgsConstructor + @AllArgsConstructor + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Snapshot { + private String tableVersion; + private String tableLocation; + private Long tableSizeBytes; + private Long numCurrentFiles; + } + + /** + * Per-commit incremental counters. Map to optimizer's {@code TableStatsPayload.CommitDeltaDto}. + */ + @Data + @Builder + @NoArgsConstructor + @AllArgsConstructor + @JsonInclude(JsonInclude.Include.NON_NULL) + public static class Delta { + private Long numFilesAdded; + private Long numFilesDeleted; + private Long addedSizeBytes; + private Long deletedSizeBytes; + } +} diff --git a/services/tables/src/main/resources/application.properties b/services/tables/src/main/resources/application.properties index 4a6c82a89..ddf4c3022 100644 --- a/services/tables/src/main/resources/application.properties +++ b/services/tables/src/main/resources/application.properties @@ -23,4 +23,12 @@ management.metrics.distribution.maximum-expected-value.catalog_metadata_retrieva management.metrics.distribution.maximum-expected-value.catalog_metadata_update_latency=600s management.metrics.distribution.maximum-expected-value.http.server.requests=600s server.shutdown=graceful -spring.lifecycle.timeout-per-shutdown-phase=60s \ No newline at end of file +spring.lifecycle.timeout-per-shutdown-phase=60s + +# Post-commit Tables -> Optimizer stats push. +# Disabled by default; environments that run the optimizer opt in via env vars. +optimizer.stats.enabled=${OPTIMIZER_STATS_ENABLED:false} +optimizer.stats.base-uri=${OPTIMIZER_STATS_BASE_URI:} +optimizer.stats.per-attempt-timeout-ms=${OPTIMIZER_STATS_PER_ATTEMPT_TIMEOUT_MS:1000} +optimizer.stats.total-timeout-ms=${OPTIMIZER_STATS_TOTAL_TIMEOUT_MS:2000} +optimizer.stats.max-attempts=${OPTIMIZER_STATS_MAX_ATTEMPTS:3} \ No newline at end of file diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java index 44467b705..80f9233af 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java @@ -15,7 +15,10 @@ import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository; import com.linkedin.openhouse.tables.services.IcebergSnapshotsService; +import com.linkedin.openhouse.tables.services.optimizer.OptimizerStatsClient; import com.linkedin.openhouse.tables.utils.TableUUIDGenerator; +import java.util.Collections; +import java.util.Map; import java.util.Optional; import java.util.UUID; import org.apache.iceberg.exceptions.BadRequestException; @@ -45,6 +48,14 @@ public class IcebergSnapshotsServiceTest { @MockBean private TableUUIDGenerator tableUUIDGenerator; + /** + * Forces the optional optimizer-stats client into the service so that we can verify the + * post-commit push is invoked. Production wiring is conditional on {@code + * optimizer.stats.enabled=true}; this {@code @MockBean} bypasses that condition for the one test + * that exercises the hook. + */ + @MockBean private OptimizerStatsClient optimizerStatsClient; + private OpenHouseInternalRepository mockRepository; @Captor ArgumentCaptor tableDtoArgumentCaptor; @@ -90,6 +101,32 @@ public void testTableCreated() { verifyCalls(key, TEST_TABLE_CREATOR, requestBody.getCreateUpdateTableRequestBody()); } + @Test + public void testOptimizerStatsClientInvokedAfterSuccessfulCommit() { + final IcebergSnapshotsRequestBody requestBody = + TEST_ICEBERG_SNAPSHOTS_INITIAL_VERSION_REQUEST_BODY; + final String dbId = requestBody.getCreateUpdateTableRequestBody().getDatabaseId(); + final String tableId = requestBody.getCreateUpdateTableRequestBody().getTableId(); + final TableDtoPrimaryKey key = + TableDtoPrimaryKey.builder().databaseId(dbId).tableId(tableId).build(); + final Map summary = Collections.singletonMap("total-data-files", "7"); + final TableDto savedDto = + TableDto.builder() + .databaseId(dbId) + .tableId(tableId) + .currentSnapshotSummary(summary) + .build(); + + Mockito.when(tableUUIDGenerator.generateUUID(Mockito.any(IcebergSnapshotsRequestBody.class))) + .thenReturn(UUID.randomUUID()); + Mockito.when(mockRepository.findById(key)).thenReturn(Optional.empty()); + Mockito.when(mockRepository.save(Mockito.any(TableDto.class))).thenReturn(savedDto); + + service.putIcebergSnapshots(dbId, tableId, requestBody, TEST_TABLE_CREATOR); + + Mockito.verify(optimizerStatsClient, Mockito.times(1)).report(savedDto, summary); + } + @Test public void testPutTableExceptionHandling() { final IcebergSnapshotsRequestBody requestBody = diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java index e3e19a7d8..64aefd19b 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java @@ -33,7 +33,8 @@ public class TableDtoMappingTest { "jsonSnapshots", "snapshotRefs", "policies", - "tableType"); + "tableType", + "currentSnapshotSummary"); /** Making all fields making it to map is expected, and all expected field are making it there. */ @Test diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java new file mode 100644 index 000000000..33ea0d197 --- /dev/null +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java @@ -0,0 +1,242 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.openhouse.common.metrics.MetricsConstant; +import com.linkedin.openhouse.tables.model.TableDto; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import okhttp3.mockwebserver.MockResponse; +import okhttp3.mockwebserver.MockWebServer; +import okhttp3.mockwebserver.RecordedRequest; +import okhttp3.mockwebserver.SocketPolicy; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.springframework.http.client.reactive.ReactorClientHttpConnector; +import org.springframework.web.reactive.function.client.WebClient; +import reactor.netty.http.client.HttpClient; + +class OptimizerStatsClientTest { + + private static final String TABLE_UUID = "uuid-1"; + private static final String DB = "db1"; + private static final String TABLE = "tbl1"; + private static final Duration BLOCK_MAX = Duration.ofSeconds(5); + + private MockWebServer server; + private MeterRegistry meterRegistry; + private OptimizerStatsProperties properties; + private OptimizerStatsClient client; + private final ObjectMapper mapper = new ObjectMapper(); + + @BeforeEach + void setUp() throws IOException { + server = new MockWebServer(); + server.start(); + meterRegistry = new SimpleMeterRegistry(); + properties = new OptimizerStatsProperties(); + properties.setEnabled(true); + properties.setBaseUri(server.url("/").toString()); + properties.setPerAttemptTimeoutMs(1000L); + properties.setTotalTimeoutMs(2000L); + properties.setMaxAttempts(3); + client = new OptimizerStatsClient(buildWebClient(), meterRegistry, properties); + } + + @AfterEach + void tearDown() throws IOException { + server.shutdown(); + } + + private WebClient buildWebClient() { + HttpClient httpClient = HttpClient.create(); + return WebClient.builder() + .baseUrl(properties.getBaseUri()) + .clientConnector(new ReactorClientHttpConnector(httpClient)) + .build(); + } + + @Test + void report_optedIn_sendsRequestWithPayloadFieldsFromSummary() throws Exception { + server.enqueue(new MockResponse().setResponseCode(200)); + + TableDto saved = + optedInBuilder() + .tableVersion("v3") + .tableLocation("/data/tables/db1/tbl1") + .currentSnapshotSummary(summary()) + .build(); + + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + RecordedRequest req = server.takeRequest(2, TimeUnit.SECONDS); + assertThat(req).isNotNull(); + assertThat(req.getMethod()).isEqualTo("PUT"); + assertThat(req.getPath()).isEqualTo("/v1/optimizer/stats/" + TABLE_UUID); + + @SuppressWarnings("unchecked") + Map body = mapper.readValue(req.getBody().readUtf8(), Map.class); + assertThat(body).containsEntry("databaseName", DB).containsEntry("tableName", TABLE); + @SuppressWarnings("unchecked") + Map stats = (Map) body.get("stats"); + @SuppressWarnings("unchecked") + Map snapshot = (Map) stats.get("snapshot"); + @SuppressWarnings("unchecked") + Map delta = (Map) stats.get("delta"); + assertThat(snapshot) + .containsEntry("tableVersion", "v3") + .containsEntry("tableLocation", "/data/tables/db1/tbl1") + .containsEntry("tableSizeBytes", 4096) + .containsEntry("numCurrentFiles", 12); + assertThat(delta) + .containsEntry("numFilesAdded", 5) + .containsEntry("numFilesDeleted", 2) + .containsEntry("addedSizeBytes", 2048) + .containsEntry("deletedSizeBytes", 1024); + + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_notOptedIn_skipsCallAndIncrementsSkippedCounter() { + TableDto saved = builderWithoutOptIn().currentSnapshotSummary(summary()).build(); + + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + assertThat(server.getRequestCount()).isZero(); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "opt_out") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_noSnapshot_skipsCallAndIncrementsSkippedCounter() { + TableDto saved = optedInBuilder().currentSnapshotSummary(null).build(); + + client.reportAsync(saved, null).block(BLOCK_MAX); + + assertThat(server.getRequestCount()).isZero(); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "no_snapshot") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_5xxThenSuccess_retriesAndSucceeds() { + server.enqueue(new MockResponse().setResponseCode(503)); + server.enqueue(new MockResponse().setResponseCode(200)); + + TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + assertThat(server.getRequestCount()).isEqualTo(2); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") + .count()) + .isEqualTo(1.0); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "retryable_failure") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_allAttemptsFail_swallowsAndIncrementsFailedFinal() { + properties.setMaxAttempts(2); + server.enqueue(new MockResponse().setResponseCode(503)); + server.enqueue(new MockResponse().setResponseCode(503)); + + TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + assertThat(server.getRequestCount()).isEqualTo(2); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "server_error") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_4xxNonRetryable_doesNotRetry() { + server.enqueue(new MockResponse().setResponseCode(400)); + + TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + assertThat(server.getRequestCount()).isEqualTo(1); + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "client_error") + .count()) + .isEqualTo(1.0); + } + + @Test + void report_perAttemptTimeoutTrips_recordsTimeout() { + properties.setPerAttemptTimeoutMs(150L); + properties.setTotalTimeoutMs(600L); + properties.setMaxAttempts(1); + client = new OptimizerStatsClient(buildWebClient(), meterRegistry, properties); + // Connection opens but the server never sends any response — forces Reactor's per-attempt + // .timeout(150ms) to fire with TimeoutException. + server.enqueue(new MockResponse().setSocketPolicy(SocketPolicy.NO_RESPONSE)); + + TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); + client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + + assertThat( + meterRegistry + .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "timeout") + .count()) + .isEqualTo(1.0); + } + + // ---- helpers ---- + + private static Map summary() { + Map s = new HashMap<>(); + s.put("total-data-files", "12"); + s.put("total-files-size", "4096"); + s.put("added-data-files", "5"); + s.put("deleted-data-files", "2"); + s.put("added-files-size", "2048"); + s.put("removed-files-size", "1024"); + return s; + } + + private static TableDto.TableDtoBuilder optedInBuilder() { + Map props = new HashMap<>(); + props.put(OptimizerStatsClient.OPT_IN_PROPERTY, "true"); + return TableDto.builder() + .tableUUID(TABLE_UUID) + .databaseId(DB) + .tableId(TABLE) + .tableProperties(props); + } + + private static TableDto.TableDtoBuilder builderWithoutOptIn() { + return TableDto.builder() + .tableUUID(TABLE_UUID) + .databaseId(DB) + .tableId(TABLE) + .tableProperties(new HashMap<>()); + } +} From af025aef96153de3852408e78bc0f2d8a43c5aef Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 14:16:30 -0700 Subject: [PATCH 10/14] refactor(tables): generic PostCommitOperation framework; address review Restructures the post-commit stats push around a small generic dispatcher so async/timeout/swallow plumbing lives in one place. Addresses the review comments on PR #608. Framework (new): - PostCommitOperation interface: String name() + Optional> prepare(TableDto). Implementations describe what to do; the dispatcher owns how it runs. - PostCommitDispatcher (@ConditionalOnProperty tables.postcommit.enabled): per-op wall-clock timeout, error swallow, metric emission, fire-and- forget subscribe. Exposes package-private decorate() for synchronous testing. - PostCommitProperties: enabled (default false), per-op-timeout-ms (default 3000). - Metric keys renamed: POSTCOMMIT_OP_DURATION / POSTCOMMIT_OP_SKIPPED / POSTCOMMIT_OP_FAILED, tagged op={name} and outcome={success, timeout, network_error, server_error, client_error, prepare_threw, unknown_error}. Why async: a synchronous post-commit push converts an optimizer outage into a tables write outage. Stats are a best-effort scheduling signal and must not block the write path. Crash-loss is bounded because state is cumulative: the next commit re-pushes the current state. Operation (refactor): - OptimizerStatsClient -> OptimizerStatsPostCommitOperation implementing PostCommitOperation. Drops outer timeout, onErrorResume, .subscribe() (dispatcher owns these). Keeps per-attempt timeout + bounded retry on retryable errors only. - Adds snapshotId to OptimizerStatsRequest.Snapshot; tracked for server-side idempotency wiring in BDP-102985. - Path constant comment now cites TableStatsController.TABLE_PATH_TEMPLATE as the canonical source. TableDto: - currentSnapshotSummary (Map) replaced by Optional (snapshotId + summary). Stored nullable internally; consumers read through getCurrentSnapshot() which returns Optional. Javadoc rewritten to describe the semantic condition (presence ~ table has a committed snapshot), not the private impl that populates it. - New CurrentSnapshotInfo value class. - InternalRepositoryUtils populates the new field with both snapshotId and summary; no extra I/O (still purely in-memory). Service: - IcebergSnapshotsServiceImpl: Optional replaced by Optional; on-commit hook is one line. Optimizer side: - TableStatsController publishes BASE_PATH and TABLE_PATH_TEMPLATE as public constants; @RequestMapping refactored to use BASE_PATH. Config: - application.properties: drop optimizer.stats.total-timeout-ms; add tables.postcommit.enabled + tables.postcommit.per-op-timeout-ms. Tests: - New PostCommitDispatcherTest (6 cases, no sleeps; uses decorate() to block synchronously). - OptimizerStatsClientTest renamed -> OptimizerStatsPostCommitOperationTest (6 cases adapted to the new prepare() contract; timeout case moved to PostCommitDispatcherTest). - IcebergSnapshotsServiceTest swaps @MockBean OptimizerStatsClient for @MockBean PostCommitDispatcher and verifies dispatch(savedDto) once. - TableDtoMappingTest updated for the renamed field. - All :services:tables and :services:optimizer tests pass; spotless clean. Filed: BDP-102985 (Optimizer stats: use snapshot ID as idempotency token on upsert) under epic BDP-102026. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../common/metrics/MetricsConstant.java | 12 +- .../api/controller/TableStatsController.java | 14 +- .../tables/dto/mapper/TablesMapper.java | 4 +- .../tables/model/CurrentSnapshotInfo.java | 33 +++ .../openhouse/tables/model/TableDto.java | 24 +- .../impl/InternalRepositoryUtils.java | 10 +- .../services/IcebergSnapshotsServiceImpl.java | 15 +- .../optimizer/OptimizerStatsClient.java | 245 ------------------ .../optimizer/OptimizerStatsConfig.java | 9 +- .../OptimizerStatsPostCommitOperation.java | 173 +++++++++++++ .../optimizer/OptimizerStatsProperties.java | 15 +- .../optimizer/OptimizerStatsRequest.java | 6 + .../postcommit/PostCommitDispatcher.java | 147 +++++++++++ .../postcommit/PostCommitOperation.java | 39 +++ .../postcommit/PostCommitProperties.java | 31 +++ .../src/main/resources/application.properties | 10 +- .../service/IcebergSnapshotsServiceTest.java | 29 +-- .../tables/model/TableDtoMappingTest.java | 2 +- ...ptimizerStatsPostCommitOperationTest.java} | 145 ++++------- .../postcommit/PostCommitDispatcherTest.java | 177 +++++++++++++ 20 files changed, 747 insertions(+), 393 deletions(-) create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java delete mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java create mode 100644 services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java rename services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/{OptimizerStatsClientTest.java => OptimizerStatsPostCommitOperationTest.java} (53%) create mode 100644 services/tables/src/test/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcherTest.java diff --git a/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java b/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java index 7a47b790e..1c7945f9c 100644 --- a/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java +++ b/services/common/src/main/java/com/linkedin/openhouse/common/metrics/MetricsConstant.java @@ -64,9 +64,11 @@ private MetricsConstant() {} public static final String HTS_LIST_TABLES_TIME = "hts_list_tables_time"; public static final String HTS_SEARCH_TABLES_TIME = "hts_search_tables_time"; - // Optimizer post-commit stats push (Tables → Optimizer) - public static final String OPTIMIZER_STATS_DURATION = "optimizer_stats_duration"; - public static final String OPTIMIZER_STATS_ATTEMPTS = "optimizer_stats_attempts"; - public static final String OPTIMIZER_STATS_SKIPPED = "optimizer_stats_skipped"; - public static final String OPTIMIZER_STATS_FAILED_FINAL = "optimizer_stats_failed_final"; + // Tables post-commit operation framework (bounded, best-effort actions after commit). + // Tagged with op={operation name}; on failure additionally tagged with outcome={timeout, + // network_error, server_error, client_error, prepare_threw, unknown_error}. + // The duration timer is bounded by tables.postcommit.per-op-timeout-ms. + public static final String POSTCOMMIT_OP_DURATION = "postcommit_op_duration"; + public static final String POSTCOMMIT_OP_SKIPPED = "postcommit_op_skipped"; + public static final String POSTCOMMIT_OP_FAILED = "postcommit_op_failed"; } diff --git a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java index b119dd1c7..87fbfa709 100644 --- a/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java +++ b/services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/controller/TableStatsController.java @@ -24,10 +24,22 @@ /** REST controller for managing per-table stats in the optimizer DB. */ @RestController -@RequestMapping("/v1/optimizer/stats") +@RequestMapping(TableStatsController.BASE_PATH) @RequiredArgsConstructor public class TableStatsController { + /** + * Base path for the stats endpoints. Single source of truth — external callers must reference. + */ + public static final String BASE_PATH = "/v1/optimizer/stats"; + + /** + * URI template (with {@code {tableUuid}} placeholder) for the per-table stats upsert/fetch + * endpoint. Use this from any client that calls the optimizer over HTTP, rather than rebuilding + * the path inline. + */ + public static final String TABLE_PATH_TEMPLATE = BASE_PATH + "/{tableUuid}"; + private final OptimizerDataService service; /** diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/TablesMapper.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/TablesMapper.java index 08111cd82..9315df9d8 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/TablesMapper.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/dto/mapper/TablesMapper.java @@ -73,6 +73,7 @@ public interface TablesMapper { @Mapping(source = "requestBody.sortOrder", target = "sortOrder"), @Mapping(target = "lastModifiedTime", ignore = true), @Mapping(target = "creationTime", ignore = true), + @Mapping(target = "currentSnapshot", ignore = true) }) TableDto toTableDto(TableDto tableDto, CreateUpdateTableRequestBody requestBody); @@ -120,7 +121,8 @@ public interface TablesMapper { defaultExpression = "java(TableType.PRIMARY_TABLE)"), @Mapping(source = "requestBody.createUpdateTableRequestBody.sortOrder", target = "sortOrder"), @Mapping(target = "lastModifiedTime", ignore = true), - @Mapping(target = "creationTime", ignore = true) + @Mapping(target = "creationTime", ignore = true), + @Mapping(target = "currentSnapshot", ignore = true) }) TableDto toTableDto(TableDto tableDto, IcebergSnapshotsRequestBody requestBody); diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java new file mode 100644 index 000000000..9f363b4ec --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java @@ -0,0 +1,33 @@ +package com.linkedin.openhouse.tables.model; + +import java.util.Map; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; + +/** + * In-memory snapshot of the Iceberg current-snapshot metadata that was loaded alongside a {@link + * TableDto}. Carries only fields already materialized by the catalog client — never triggers a + * separate HDFS / object-store read. + * + *

Present whenever the table has at least one committed snapshot at construction time; absent + * (modeled as {@link java.util.Optional#empty()} on {@link TableDto}) when the table has no + * committed data — e.g. a {@code CREATE TABLE} with no rows yet. + */ +@Getter +@Builder +@AllArgsConstructor +@EqualsAndHashCode +public class CurrentSnapshotInfo { + + /** Iceberg snapshot ID (decimal long). Stable per commit; usable as an idempotency token. */ + private final long snapshotId; + + /** + * Iceberg {@code Snapshot.summary()} map, unmodified. Keys include {@code total-data-files}, + * {@code total-files-size}, {@code added-data-files}, {@code deleted-data-files}, {@code + * added-files-size}, {@code removed-files-size}. + */ + private final Map summary; +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java index 24373f4a1..16608f286 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java @@ -11,6 +11,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import javax.persistence.Convert; import javax.persistence.ElementCollection; import javax.persistence.Entity; @@ -85,16 +86,21 @@ public class TableDto { private boolean replaceCommit; /** - * Iceberg {@code Snapshot.summary()} for the current snapshot at the time the {@code TableDto} - * was constructed (post-commit on save; post-load on read). Populated only when an Iceberg {@code - * Table} is available — i.e. by {@code InternalRepositoryUtils.convertToTableDto}. Not persisted, - * not part of equality. - * - *

Used downstream by the optimizer post-commit stats push (see {@code - * services.optimizer.OptimizerStatsClient}) so that the service layer can read snapshot stats - * without a separate HDFS round-trip. + * In-memory current-snapshot metadata captured when this {@code TableDto} was built from an + * Iceberg {@code Table}. Present whenever the underlying table has at least one committed + * snapshot at that point; absent for tables with no committed data (e.g. {@code CREATE TABLE} + * with no rows). Not persisted, not part of equality. Stored nullable internally; consumers must + * read through {@link #getCurrentSnapshot()} to get the {@link Optional}. */ - @Transient @EqualsAndHashCode.Exclude private Map currentSnapshotSummary; + @Getter(AccessLevel.NONE) + @Transient + @EqualsAndHashCode.Exclude + private CurrentSnapshotInfo currentSnapshot; + + /** Returns the current-snapshot metadata if any, else {@link Optional#empty()}. */ + public Optional getCurrentSnapshot() { + return Optional.ofNullable(currentSnapshot); + } /** * Bundling eligible string type field into a map as {@link org.mapstruct.Mapper} doesn't provide diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java index 9ba40c2d8..d3ced5458 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/repository/impl/InternalRepositoryUtils.java @@ -9,6 +9,7 @@ import com.linkedin.openhouse.tables.dto.mapper.iceberg.PartitionSpecMapper; import com.linkedin.openhouse.tables.dto.mapper.iceberg.PoliciesSpecMapper; import com.linkedin.openhouse.tables.dto.mapper.iceberg.TableTypeMapper; +import com.linkedin.openhouse.tables.model.CurrentSnapshotInfo; import com.linkedin.openhouse.tables.model.TableDto; import com.linkedin.openhouse.tables.repository.PreservedKeyChecker; import java.net.URI; @@ -135,8 +136,13 @@ static TableDto convertToTableDto( .jsonSnapshots(null) .tableProperties(megaProps) .sortOrder(SortOrderParser.toJson(table.sortOrder())) - .currentSnapshotSummary( - table.currentSnapshot() == null ? null : table.currentSnapshot().summary()) + .currentSnapshot( + table.currentSnapshot() == null + ? null + : CurrentSnapshotInfo.builder() + .snapshotId(table.currentSnapshot().snapshotId()) + .summary(table.currentSnapshot().summary()) + .build()) .build(); return tableDto; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java index bb2cbde4c..6623b5f37 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/IcebergSnapshotsServiceImpl.java @@ -10,7 +10,7 @@ import com.linkedin.openhouse.tables.model.TableDto; import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository; -import com.linkedin.openhouse.tables.services.optimizer.OptimizerStatsClient; +import com.linkedin.openhouse.tables.services.postcommit.PostCommitDispatcher; import com.linkedin.openhouse.tables.utils.AuthorizationUtils; import com.linkedin.openhouse.tables.utils.TableUUIDGenerator; import java.util.Optional; @@ -34,11 +34,11 @@ public class IcebergSnapshotsServiceImpl implements IcebergSnapshotsService { @Autowired AuthorizationUtils authorizationUtils; /** - * Present only when {@code optimizer.stats.enabled=true}. When absent, no post-commit push is - * attempted. + * Present only when {@code tables.postcommit.enabled=true}. When absent, the on-commit hook is a + * literal no-op and no post-commit operations run. */ @Autowired(required = false) - Optional optimizerStatsClient = Optional.empty(); + Optional postCommitDispatcher = Optional.empty(); @Override public Pair putIcebergSnapshots( @@ -92,12 +92,7 @@ public Pair putIcebergSnapshots( } try { TableDto savedDto = openHouseInternalRepository.save(tableDtoToSave); - // Fire-and-forget push of the post-commit snapshot summary to the optimizer. Returns - // immediately; failures are swallowed inside the client. Skipped at the client when the - // table is not opted in via maintenance.optimizer.stats.enabled or when there is no current - // snapshot (e.g. CREATE TABLE without a data commit). - optimizerStatsClient.ifPresent( - client -> client.report(savedDto, savedDto.getCurrentSnapshotSummary())); + postCommitDispatcher.ifPresent(d -> d.dispatch(savedDto)); return Pair.of(savedDto, !tableDto.isPresent()); } catch (BadRequestException e) { throw new RequestValidationFailureException(e.getMessage(), e); diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java deleted file mode 100644 index 23c69082e..000000000 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClient.java +++ /dev/null @@ -1,245 +0,0 @@ -package com.linkedin.openhouse.tables.services.optimizer; - -import com.linkedin.openhouse.common.metrics.MetricsConstant; -import com.linkedin.openhouse.tables.model.TableDto; -import io.micrometer.core.instrument.MeterRegistry; -import io.micrometer.core.instrument.Timer; -import java.time.Duration; -import java.util.Map; -import java.util.concurrent.TimeoutException; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Qualifier; -import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; -import org.springframework.stereotype.Component; -import org.springframework.web.reactive.function.client.WebClient; -import org.springframework.web.reactive.function.client.WebClientRequestException; -import org.springframework.web.reactive.function.client.WebClientResponseException; -import reactor.core.publisher.Mono; -import reactor.util.retry.Retry; -import reactor.util.retry.RetrySpec; - -/** - * Pushes a stats record to the optimizer's {@code PUT /v1/optimizer/stats/{tableUuid}} endpoint - * after a successful Iceberg snapshot commit. Fire-and-forget — the {@link #report} call returns - * immediately on the commit thread; the HTTP exchange runs on the WebClient's Netty event loop. - * - *

Errors are recorded as Micrometer metrics and logged at {@code warn} level, but never - * propagated. A push failure must not break the commit. - * - *

Timeout / retry shape (from {@link OptimizerStatsProperties}): - * - *

    - *
  • {@code perAttemptTimeoutMs} bounds each HTTP attempt (1000 ms default). - *
  • {@code maxAttempts} caps the total number of attempts (3 default — one initial + two - * retries). Retries only fire on retryable errors (network, 5xx, 408, 429, timeout). - *
  • {@code totalTimeoutMs} is the outer wall-clock ceiling (2000 ms default). Hard cancel. - *
- * - *

The bean is only wired when {@code optimizer.stats.enabled=true}. - */ -@Slf4j -@Component -@ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") -public class OptimizerStatsClient { - - /** Per-call URL path. */ - static final String PATH_TEMPLATE = "/v1/optimizer/stats/{tableUuid}"; - - /** Table-property key that opts a table in for the post-commit stats push. */ - static final String OPT_IN_PROPERTY = "maintenance.optimizer.stats.enabled"; - - /** Iceberg snapshot-summary keys we read. All values are decimal-string longs. */ - static final String SUMMARY_TOTAL_DATA_FILES = "total-data-files"; - - static final String SUMMARY_TOTAL_FILES_SIZE = "total-files-size"; - static final String SUMMARY_ADDED_DATA_FILES = "added-data-files"; - static final String SUMMARY_DELETED_DATA_FILES = "deleted-data-files"; - static final String SUMMARY_ADDED_FILES_SIZE = "added-files-size"; - static final String SUMMARY_REMOVED_FILES_SIZE = "removed-files-size"; - - private final WebClient webClient; - private final MeterRegistry meterRegistry; - private final OptimizerStatsProperties properties; - - public OptimizerStatsClient( - @Qualifier("optimizerStatsWebClient") WebClient webClient, - MeterRegistry meterRegistry, - OptimizerStatsProperties properties) { - this.webClient = webClient; - this.meterRegistry = meterRegistry; - this.properties = properties; - } - - /** - * Build and dispatch a stats push for the just-committed table. Returns immediately; the HTTP - * call runs in the background. {@code snapshotSummary} is the in-memory {@code - * Snapshot.summary()} map from the latest snapshot (no HDFS round-trip). - * - *

The call is skipped (and a {@code skipped} counter is emitted) when: - * - *

    - *
  • The table is not opted in via {@link #OPT_IN_PROPERTY}. - *
  • {@code snapshotSummary} is null or empty (no committed snapshot yet — e.g. CREATE TABLE - * with no rows). - *
- */ - public void report(TableDto saved, Map snapshotSummary) { - reportAsync(saved, snapshotSummary).subscribe(); - } - - /** - * Same as {@link #report} but returns the underlying {@link Mono} so callers (notably tests) can - * block on completion. Production callers use {@link #report}. - */ - Mono reportAsync(TableDto saved, Map snapshotSummary) { - if (!isOptedIn(saved)) { - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "opt_out") - .increment(); - return Mono.empty(); - } - if (snapshotSummary == null || snapshotSummary.isEmpty()) { - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "no_snapshot") - .increment(); - return Mono.empty(); - } - - OptimizerStatsRequest body = buildRequest(saved, snapshotSummary); - String tableUuid = saved.getTableUUID(); - Timer.Sample sample = Timer.start(meterRegistry); - - RetrySpec retrySpec = - Retry.max(Math.max(0, properties.getMaxAttempts() - 1)) - .filter(this::isRetryable) - .onRetryExhaustedThrow((spec, signal) -> signal.failure()); - - return webClient - .put() - .uri(PATH_TEMPLATE, tableUuid) - .bodyValue(body) - .retrieve() - .toBodilessEntity() - .timeout(Duration.ofMillis(properties.getPerAttemptTimeoutMs())) - .doOnError( - e -> - meterRegistry - .counter( - MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, - "outcome", - isRetryable(e) ? "retryable_failure" : "non_retryable_failure") - .increment()) - .doOnSuccess( - ignored -> - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") - .increment()) - .retryWhen(retrySpec) - .timeout(Duration.ofMillis(properties.getTotalTimeoutMs())) - .doOnSuccess( - ignored -> - sample.stop( - meterRegistry.timer( - MetricsConstant.OPTIMIZER_STATS_DURATION, "outcome", "success"))) - .onErrorResume( - e -> { - String outcome = classifyOutcome(e); - sample.stop( - meterRegistry.timer( - MetricsConstant.OPTIMIZER_STATS_DURATION, "outcome", outcome)); - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", outcome) - .increment(); - log.warn( - "Optimizer stats push failed for table {} ({}): {}", - tableUuid, - outcome, - e.toString()); - return Mono.empty(); - }) - .then(); - } - - /** {@code true} iff the table's properties contain the literal opt-in value {@code "true"}. */ - private boolean isOptedIn(TableDto saved) { - Map props = saved.getTableProperties(); - return props != null && "true".equals(props.get(OPT_IN_PROPERTY)); - } - - /** Build the wire body. Missing summary keys default to 0L. */ - OptimizerStatsRequest buildRequest(TableDto saved, Map summary) { - OptimizerStatsRequest.Snapshot snapshot = - OptimizerStatsRequest.Snapshot.builder() - .tableVersion(saved.getTableVersion()) - .tableLocation(saved.getTableLocation()) - .tableSizeBytes(longOrZero(summary, SUMMARY_TOTAL_FILES_SIZE)) - .numCurrentFiles(longOrZero(summary, SUMMARY_TOTAL_DATA_FILES)) - .build(); - OptimizerStatsRequest.Delta delta = - OptimizerStatsRequest.Delta.builder() - .numFilesAdded(longOrZero(summary, SUMMARY_ADDED_DATA_FILES)) - .numFilesDeleted(longOrZero(summary, SUMMARY_DELETED_DATA_FILES)) - .addedSizeBytes(longOrZero(summary, SUMMARY_ADDED_FILES_SIZE)) - .deletedSizeBytes(longOrZero(summary, SUMMARY_REMOVED_FILES_SIZE)) - .build(); - return OptimizerStatsRequest.builder() - .databaseName(saved.getDatabaseId()) - .tableName(saved.getTableId()) - .stats(OptimizerStatsRequest.Stats.builder().snapshot(snapshot).delta(delta).build()) - .tableProperties(saved.getTableProperties()) - .build(); - } - - /** - * Retryable errors: per-attempt timeout, network-level failures, 5xx, 408, 429. Other 4xx are - * client errors — retrying won't fix them. - */ - boolean isRetryable(Throwable e) { - if (e instanceof TimeoutException) { - return true; - } - if (e instanceof WebClientRequestException) { - return true; - } - if (e instanceof WebClientResponseException) { - int code = ((WebClientResponseException) e).getStatusCode().value(); - return code == 408 || code == 429 || (code >= 500 && code < 600); - } - return false; - } - - /** - * Map a final-stage error to one of {@code success, timeout, network_error, server_error, - * client_error, unknown_error} for the {@code outcome} tag on duration / failed_final metrics. - */ - private String classifyOutcome(Throwable e) { - if (e instanceof TimeoutException) { - return "timeout"; - } - if (e instanceof WebClientRequestException) { - return "network_error"; - } - if (e instanceof WebClientResponseException) { - int code = ((WebClientResponseException) e).getStatusCode().value(); - if (code >= 500 && code < 600) { - return "server_error"; - } - if (code >= 400 && code < 500) { - return "client_error"; - } - } - return "unknown_error"; - } - - private static long longOrZero(Map m, String key) { - String v = m.get(key); - if (v == null) { - return 0L; - } - try { - return Long.parseLong(v); - } catch (NumberFormatException nfe) { - return 0L; - } - } -} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java index bbddbce6b..8061fb644 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java @@ -20,10 +20,11 @@ public class OptimizerStatsConfig { /** * Dedicated WebClient for the optimizer stats endpoint. Per-attempt and outer timeouts are - * applied at the call site on the Reactor chain in {@link OptimizerStatsClient} — they are not - * configured on the underlying Netty client so that the timeout always emerges as a standard - * {@link java.util.concurrent.TimeoutException} (not a Netty {@code ReadTimeoutException}), which - * keeps the client's outcome classification simple. + * applied at the call site on the Reactor chain (per-attempt in {@link + * OptimizerStatsPostCommitOperation}, outer per-op in the {@code PostCommitDispatcher}) — they + * are not configured on the underlying Netty client so that the timeout always emerges as a + * standard {@link java.util.concurrent.TimeoutException} (not a Netty {@code + * ReadTimeoutException}), which keeps the dispatcher's outcome classification simple. */ @Bean("optimizerStatsWebClient") public WebClient optimizerStatsWebClient(OptimizerStatsProperties properties) { diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java new file mode 100644 index 000000000..0a82d8598 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java @@ -0,0 +1,173 @@ +package com.linkedin.openhouse.tables.services.optimizer; + +import com.linkedin.openhouse.tables.model.CurrentSnapshotInfo; +import com.linkedin.openhouse.tables.model.TableDto; +import com.linkedin.openhouse.tables.services.postcommit.PostCommitOperation; +import java.time.Duration; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeoutException; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.reactive.function.client.WebClientRequestException; +import org.springframework.web.reactive.function.client.WebClientResponseException; +import reactor.core.publisher.Mono; +import reactor.util.retry.Retry; +import reactor.util.retry.RetrySpec; + +/** + * {@link PostCommitOperation} that PUTs a snapshot-stats record to the optimizer's per-table stats + * endpoint. {@link #prepare(TableDto)} returns a {@link Mono} that completes on HTTP 2xx and + * signals an error otherwise; the dispatcher owns timeout, subscription, error swallowing, and + * metric emission. + * + *

Skipped (operation returns {@link Optional#empty()}) when the table is not opted in via the + * {@link #OPT_IN_PROPERTY} table property, or when no current snapshot is present (e.g. {@code + * CREATE TABLE} with no rows yet). + * + *

Internal retry: bounded by {@link OptimizerStatsProperties#getMaxAttempts()}, fires only on + * retryable errors (network, {@link TimeoutException}, HTTP 408 / 429 / 5xx). The dispatcher's + * outer per-op timeout is the hard ceiling on the whole chain. + * + *

Bean is only wired when {@code optimizer.stats.enabled=true}. The path constant is + * intentionally duplicated from {@code TableStatsController.TABLE_PATH_TEMPLATE}; keep in sync. + * Tables service does not take a compile-time dependency on the optimizer service jar. + */ +@Slf4j +@Component +@ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") +public class OptimizerStatsPostCommitOperation implements PostCommitOperation { + + /** Metric tag value for {@code op}. */ + static final String OP_NAME = "optimizer_stats"; + + /** + * Per-call URL path. Intentionally duplicated from {@code + * TableStatsController.TABLE_PATH_TEMPLATE}; keep in sync. + */ + static final String PATH_TEMPLATE = "/v1/optimizer/stats/{tableUuid}"; + + /** Table-property key that opts a table in for the post-commit stats push. */ + static final String OPT_IN_PROPERTY = "maintenance.optimizer.stats.enabled"; + + /** Iceberg snapshot-summary keys we read. All values are decimal-string longs. */ + static final String SUMMARY_TOTAL_DATA_FILES = "total-data-files"; + + static final String SUMMARY_TOTAL_FILES_SIZE = "total-files-size"; + static final String SUMMARY_ADDED_DATA_FILES = "added-data-files"; + static final String SUMMARY_DELETED_DATA_FILES = "deleted-data-files"; + static final String SUMMARY_ADDED_FILES_SIZE = "added-files-size"; + static final String SUMMARY_REMOVED_FILES_SIZE = "removed-files-size"; + + private final WebClient webClient; + private final OptimizerStatsProperties properties; + + public OptimizerStatsPostCommitOperation( + @Qualifier("optimizerStatsWebClient") WebClient webClient, + OptimizerStatsProperties properties) { + this.webClient = webClient; + this.properties = properties; + } + + @Override + public String name() { + return OP_NAME; + } + + @Override + public Optional> prepare(TableDto savedDto) { + if (!isOptedIn(savedDto)) { + return Optional.empty(); + } + Optional snapshot = savedDto.getCurrentSnapshot(); + if (!snapshot.isPresent()) { + return Optional.empty(); + } + + OptimizerStatsRequest body = buildRequest(savedDto, snapshot.get()); + String tableUuid = savedDto.getTableUUID(); + + RetrySpec retrySpec = + Retry.max(Math.max(0, properties.getMaxAttempts() - 1)).filter(this::isRetryable); + + Mono chain = + webClient + .put() + .uri(PATH_TEMPLATE, tableUuid) + .bodyValue(body) + .retrieve() + .toBodilessEntity() + .timeout(Duration.ofMillis(properties.getPerAttemptTimeoutMs())) + .retryWhen(retrySpec.onRetryExhaustedThrow((spec, signal) -> signal.failure())) + .then(); + return Optional.of(chain); + } + + /** {@code true} iff the table's properties contain the literal opt-in value {@code "true"}. */ + private boolean isOptedIn(TableDto saved) { + Map props = saved.getTableProperties(); + return props != null && "true".equals(props.get(OPT_IN_PROPERTY)); + } + + /** Build the wire body. Missing summary keys default to 0L. */ + OptimizerStatsRequest buildRequest(TableDto saved, CurrentSnapshotInfo snapshot) { + Map summary = snapshot.getSummary(); + OptimizerStatsRequest.Snapshot snapshotPayload = + OptimizerStatsRequest.Snapshot.builder() + .snapshotId(snapshot.getSnapshotId()) + .tableVersion(saved.getTableVersion()) + .tableLocation(saved.getTableLocation()) + .tableSizeBytes(longOrZero(summary, SUMMARY_TOTAL_FILES_SIZE)) + .numCurrentFiles(longOrZero(summary, SUMMARY_TOTAL_DATA_FILES)) + .build(); + OptimizerStatsRequest.Delta delta = + OptimizerStatsRequest.Delta.builder() + .numFilesAdded(longOrZero(summary, SUMMARY_ADDED_DATA_FILES)) + .numFilesDeleted(longOrZero(summary, SUMMARY_DELETED_DATA_FILES)) + .addedSizeBytes(longOrZero(summary, SUMMARY_ADDED_FILES_SIZE)) + .deletedSizeBytes(longOrZero(summary, SUMMARY_REMOVED_FILES_SIZE)) + .build(); + return OptimizerStatsRequest.builder() + .databaseName(saved.getDatabaseId()) + .tableName(saved.getTableId()) + .stats(OptimizerStatsRequest.Stats.builder().snapshot(snapshotPayload).delta(delta).build()) + .tableProperties(saved.getTableProperties()) + .build(); + } + + /** + * Retryable errors: per-attempt timeout, network-level failures, 5xx, 408, 429. Other 4xx are + * client errors — retrying won't fix them. + */ + boolean isRetryable(Throwable e) { + if (e instanceof TimeoutException) { + return true; + } + if (e instanceof WebClientRequestException) { + return true; + } + if (e instanceof WebClientResponseException) { + int code = ((WebClientResponseException) e).getStatusCode().value(); + return code == 408 || code == 429 || (code >= 500 && code < 600); + } + return false; + } + + private static long longOrZero(Map m, String key) { + if (m == null) { + return 0L; + } + String v = m.get(key); + if (v == null) { + return 0L; + } + try { + return Long.parseLong(v); + } catch (NumberFormatException nfe) { + return 0L; + } + } +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java index 7d746ee93..53b1844be 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java @@ -25,19 +25,16 @@ public class OptimizerStatsProperties { */ private String baseUri; - /** Per-attempt request timeout in milliseconds. Default 1000. */ - private long perAttemptTimeoutMs = 1000L; - /** - * Hard ceiling on total wall-clock time for the entire call (including retries) in milliseconds. - * Default 2000. When the outer timeout fires, the chain is cancelled and the error is swallowed. + * Per-attempt HTTP timeout in milliseconds. Bounds each individual attempt so retries can fit + * inside the dispatcher's outer per-op budget ({@code tables.postcommit.per-op-timeout-ms}, + * default 3000 ms). Default 1000 ms. */ - private long totalTimeoutMs = 2000L; + private long perAttemptTimeoutMs = 1000L; /** - * Total attempt count (initial try plus retries). Default 3. With 1000 ms per attempt and a 2000 - * ms outer ceiling, only about two attempts realistically fit on a slow path; the third is - * available if attempts return quickly. + * Total attempt count (initial try plus retries). Default 3. Retries fire only on retryable + * errors (network, timeout, 408/429/5xx); other 4xx fail fast without retry. */ private int maxAttempts = 3; } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java index 68a8646da..df929f666 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java @@ -47,6 +47,12 @@ public static class Stats { @AllArgsConstructor @JsonInclude(JsonInclude.Include.NON_NULL) public static class Snapshot { + /** + * Iceberg snapshot ID. Sent so the optimizer can use it as an idempotency token on upsert and + * reject out-of-order replays — server-side wiring tracked in BDP-102985. + */ + private Long snapshotId; + private String tableVersion; private String tableLocation; private Long tableSizeBytes; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java new file mode 100644 index 000000000..d205aee12 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java @@ -0,0 +1,147 @@ +package com.linkedin.openhouse.tables.services.postcommit; + +import com.linkedin.openhouse.common.metrics.MetricsConstant; +import com.linkedin.openhouse.tables.model.TableDto; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Timer; +import java.time.Duration; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeoutException; +import lombok.extern.slf4j.Slf4j; +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.client.WebClientRequestException; +import org.springframework.web.reactive.function.client.WebClientResponseException; +import reactor.core.publisher.Mono; + +/** + * Runs {@link PostCommitOperation}s after a successful Iceberg commit. Best-effort and bounded: + * each operation gets a wall-clock timeout, errors are swallowed after metric/log, and dispatch + * itself is fire-and-forget so the commit thread is never blocked on operation work. + * + *

Why async. A synchronous post-commit hook converts a downstream outage (the optimizer + * being slow or unavailable, a network glitch) into a Tables-Service write outage — the post-commit + * push is a best-effort scheduling signal, not a write-correctness step, and its blast radius must + * not include the write path. The crash-loss window (a JVM dying after commit and before the HTTP + * push completes) is acceptable because operations are designed to be cumulative: the next commit + * carries the same state forward and the consumer self-corrects from missing data. + * + *

Why the dispatcher owns timeouts. Operations only describe payload + endpoint. The + * timeout/swallow/subscribe machinery lives here once so individual operations stay small and so + * the contract across operations is uniform (one knob: {@code + * tables.postcommit.per-op-timeout-ms}). + * + *

Bean is only wired when {@code tables.postcommit.enabled=true}. + */ +@Slf4j +@Component +@EnableConfigurationProperties(PostCommitProperties.class) +@ConditionalOnProperty(prefix = "tables.postcommit", name = "enabled", havingValue = "true") +public class PostCommitDispatcher { + + private final List operations; + private final PostCommitProperties properties; + private final MeterRegistry meterRegistry; + + public PostCommitDispatcher( + List operations, + PostCommitProperties properties, + MeterRegistry meterRegistry) { + this.operations = operations; + this.properties = properties; + this.meterRegistry = meterRegistry; + } + + /** + * Dispatch all registered operations for {@code savedDto}. Returns immediately on the calling + * thread; each operation runs on its underlying reactive scheduler. Never throws. + */ + public void dispatch(TableDto savedDto) { + for (PostCommitOperation op : operations) { + decorate(op, savedDto).ifPresent(Mono::subscribe); + } + } + + /** + * Returns the fully-decorated {@link Mono} for {@code op} (per-op timeout, success / error metric + * emission, error swallow) without subscribing. Emits the {@code skipped} or {@code + * prepare_threw} metric synchronously and returns {@link Optional#empty()} in those cases. + * + *

Package-private so tests can {@code .block()} on the decorated chain rather than polling for + * metric emission after a fire-and-forget {@code subscribe()}. + */ + Optional> decorate(PostCommitOperation op, TableDto savedDto) { + Optional> work; + try { + work = op.prepare(savedDto); + } catch (RuntimeException e) { + // Defensive: a prepare() that throws synchronously must not break dispatch of later ops. + meterRegistry + .counter( + MetricsConstant.POSTCOMMIT_OP_FAILED, "op", op.name(), "outcome", "prepare_threw") + .increment(); + log.warn("Post-commit op {} prepare() threw {}", op.name(), e.toString()); + return Optional.empty(); + } + if (!work.isPresent()) { + meterRegistry.counter(MetricsConstant.POSTCOMMIT_OP_SKIPPED, "op", op.name()).increment(); + return Optional.empty(); + } + Timer.Sample sample = Timer.start(meterRegistry); + Mono decorated = + work.get() + .timeout(Duration.ofMillis(properties.getPerOpTimeoutMs())) + .doOnSuccess( + ignored -> + sample.stop( + meterRegistry.timer( + MetricsConstant.POSTCOMMIT_OP_DURATION, + "op", + op.name(), + "outcome", + "success"))) + .onErrorResume( + e -> { + String outcome = classifyOutcome(e); + sample.stop( + meterRegistry.timer( + MetricsConstant.POSTCOMMIT_OP_DURATION, + "op", + op.name(), + "outcome", + outcome)); + meterRegistry + .counter( + MetricsConstant.POSTCOMMIT_OP_FAILED, "op", op.name(), "outcome", outcome) + .increment(); + log.warn("Post-commit op {} failed ({}): {}", op.name(), outcome, e.toString()); + return Mono.empty(); + }); + return Optional.of(decorated); + } + + /** + * Map a terminal error to a small set of outcome tags. Kept here so all operations share the same + * taxonomy. + */ + private static String classifyOutcome(Throwable e) { + if (e instanceof TimeoutException) { + return "timeout"; + } + if (e instanceof WebClientRequestException) { + return "network_error"; + } + if (e instanceof WebClientResponseException) { + int code = ((WebClientResponseException) e).getStatusCode().value(); + if (code >= 500 && code < 600) { + return "server_error"; + } + if (code >= 400 && code < 500) { + return "client_error"; + } + } + return "unknown_error"; + } +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java new file mode 100644 index 000000000..5ba49b371 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java @@ -0,0 +1,39 @@ +package com.linkedin.openhouse.tables.services.postcommit; + +import com.linkedin.openhouse.tables.model.TableDto; +import java.util.Optional; +import reactor.core.publisher.Mono; + +/** + * A best-effort, bounded, asynchronous action run by the Tables Service after a successful Iceberg + * commit. Operations are invoked by {@link PostCommitDispatcher} and are subject to a single per-op + * wall-clock timeout owned by the dispatcher. Operations never affect commit correctness: any error + * they signal is recorded as a metric and a log line, then swallowed. + * + *

Implementations describe what to do; the dispatcher owns how it runs + * (subscription, timeout, error handling, metric emission). Each operation contributes: + * + *

    + *
  • {@link #name()} — short identifier used as a metric tag. + *
  • {@link #prepare(TableDto)} — returns the work to perform, or {@link Optional#empty()} when + * the operation does not apply to this commit (e.g. table not opted in, no committed + * snapshot). Returning empty is a normal outcome; the dispatcher emits a {@code skipped} + * metric and moves on. + *
+ * + *

The returned {@link Mono} must not subscribe itself, apply its own outer timeout, or swallow + * errors — those concerns belong to the dispatcher. Implementations may apply internal retries with + * bounded budgets. + */ +public interface PostCommitOperation { + + /** Short, stable identifier for this operation. Used as the {@code op} metric tag. */ + String name(); + + /** + * Build the work to run for {@code savedDto}, or return {@link Optional#empty()} if this + * operation does not apply. The dispatcher only invokes the returned {@link Mono} after applying + * its own timeout / error-swallow plumbing. + */ + Optional> prepare(TableDto savedDto); +} diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java new file mode 100644 index 000000000..8a153cd79 --- /dev/null +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java @@ -0,0 +1,31 @@ +package com.linkedin.openhouse.tables.services.postcommit; + +import lombok.Getter; +import lombok.Setter; +import org.springframework.boot.context.properties.ConfigurationProperties; + +/** + * Configuration for the Tables Service post-commit operation framework. Property prefix: {@code + * tables.postcommit}. + * + *

When {@code enabled} is {@code false} (the default), no dispatcher bean is constructed and + * registered {@link PostCommitOperation} beans, if any, are never invoked. Per-OH-instance roll-out + * flips a single flag. + */ +@ConfigurationProperties("tables.postcommit") +@Getter +@Setter +public class PostCommitProperties { + + /** Master switch. When {@code false}, no operations are dispatched on commit. */ + private boolean enabled = false; + + /** + * Wall-clock ceiling applied by the dispatcher to each operation's {@link + * PostCommitOperation#prepare(com.linkedin.openhouse.tables.model.TableDto)} {@code Mono}. Bounds + * how long a misbehaving operation can keep resources (connections, threads) tied up. Does not + * bound commit-thread latency — operations run on the executing reactive scheduler, not on the + * commit thread. Default 3000 ms. + */ + private long perOpTimeoutMs = 3000L; +} diff --git a/services/tables/src/main/resources/application.properties b/services/tables/src/main/resources/application.properties index ddf4c3022..24f5554c5 100644 --- a/services/tables/src/main/resources/application.properties +++ b/services/tables/src/main/resources/application.properties @@ -25,10 +25,14 @@ management.metrics.distribution.maximum-expected-value.http.server.requests=600s server.shutdown=graceful spring.lifecycle.timeout-per-shutdown-phase=60s -# Post-commit Tables -> Optimizer stats push. -# Disabled by default; environments that run the optimizer opt in via env vars. +# Post-commit operation framework. Disabled by default; flip per OH instance. +# Outer per-op wall-clock budget is enforced by PostCommitDispatcher. +tables.postcommit.enabled=${TABLES_POSTCOMMIT_ENABLED:false} +tables.postcommit.per-op-timeout-ms=${TABLES_POSTCOMMIT_PER_OP_TIMEOUT_MS:3000} + +# Optimizer stats post-commit operation (one impl of PostCommitOperation). +# Both flags must be true for the push to be wired and dispatched. optimizer.stats.enabled=${OPTIMIZER_STATS_ENABLED:false} optimizer.stats.base-uri=${OPTIMIZER_STATS_BASE_URI:} optimizer.stats.per-attempt-timeout-ms=${OPTIMIZER_STATS_PER_ATTEMPT_TIMEOUT_MS:1000} -optimizer.stats.total-timeout-ms=${OPTIMIZER_STATS_TOTAL_TIMEOUT_MS:2000} optimizer.stats.max-attempts=${OPTIMIZER_STATS_MAX_ATTEMPTS:3} \ No newline at end of file diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java index 80f9233af..648ae1871 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/mock/service/IcebergSnapshotsServiceTest.java @@ -11,14 +11,14 @@ import com.linkedin.openhouse.tables.api.spec.v0.request.components.Policies; import com.linkedin.openhouse.tables.dto.mapper.TablesMapper; import com.linkedin.openhouse.tables.dto.mapper.TablesMapperImpl; +import com.linkedin.openhouse.tables.model.CurrentSnapshotInfo; import com.linkedin.openhouse.tables.model.TableDto; import com.linkedin.openhouse.tables.model.TableDtoPrimaryKey; import com.linkedin.openhouse.tables.repository.OpenHouseInternalRepository; import com.linkedin.openhouse.tables.services.IcebergSnapshotsService; -import com.linkedin.openhouse.tables.services.optimizer.OptimizerStatsClient; +import com.linkedin.openhouse.tables.services.postcommit.PostCommitDispatcher; import com.linkedin.openhouse.tables.utils.TableUUIDGenerator; import java.util.Collections; -import java.util.Map; import java.util.Optional; import java.util.UUID; import org.apache.iceberg.exceptions.BadRequestException; @@ -49,12 +49,11 @@ public class IcebergSnapshotsServiceTest { @MockBean private TableUUIDGenerator tableUUIDGenerator; /** - * Forces the optional optimizer-stats client into the service so that we can verify the - * post-commit push is invoked. Production wiring is conditional on {@code - * optimizer.stats.enabled=true}; this {@code @MockBean} bypasses that condition for the one test - * that exercises the hook. + * Forces the optional post-commit dispatcher into the service so that we can verify it is invoked + * after a successful save. Production wiring is conditional on {@code + * tables.postcommit.enabled=true}; this {@code @MockBean} bypasses that condition. */ - @MockBean private OptimizerStatsClient optimizerStatsClient; + @MockBean private PostCommitDispatcher postCommitDispatcher; private OpenHouseInternalRepository mockRepository; @@ -102,20 +101,20 @@ public void testTableCreated() { } @Test - public void testOptimizerStatsClientInvokedAfterSuccessfulCommit() { + public void testPostCommitDispatcherInvokedAfterSuccessfulCommit() { final IcebergSnapshotsRequestBody requestBody = TEST_ICEBERG_SNAPSHOTS_INITIAL_VERSION_REQUEST_BODY; final String dbId = requestBody.getCreateUpdateTableRequestBody().getDatabaseId(); final String tableId = requestBody.getCreateUpdateTableRequestBody().getTableId(); final TableDtoPrimaryKey key = TableDtoPrimaryKey.builder().databaseId(dbId).tableId(tableId).build(); - final Map summary = Collections.singletonMap("total-data-files", "7"); - final TableDto savedDto = - TableDto.builder() - .databaseId(dbId) - .tableId(tableId) - .currentSnapshotSummary(summary) + final CurrentSnapshotInfo snapshot = + CurrentSnapshotInfo.builder() + .snapshotId(42L) + .summary(Collections.singletonMap("total-data-files", "7")) .build(); + final TableDto savedDto = + TableDto.builder().databaseId(dbId).tableId(tableId).currentSnapshot(snapshot).build(); Mockito.when(tableUUIDGenerator.generateUUID(Mockito.any(IcebergSnapshotsRequestBody.class))) .thenReturn(UUID.randomUUID()); @@ -124,7 +123,7 @@ public void testOptimizerStatsClientInvokedAfterSuccessfulCommit() { service.putIcebergSnapshots(dbId, tableId, requestBody, TEST_TABLE_CREATOR); - Mockito.verify(optimizerStatsClient, Mockito.times(1)).report(savedDto, summary); + Mockito.verify(postCommitDispatcher, Mockito.times(1)).dispatch(savedDto); } @Test diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java index 64aefd19b..53fb633e3 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/model/TableDtoMappingTest.java @@ -34,7 +34,7 @@ public class TableDtoMappingTest { "snapshotRefs", "policies", "tableType", - "currentSnapshotSummary"); + "currentSnapshot"); /** Making all fields making it to map is expected, and all expected field are making it there. */ @Test diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperationTest.java similarity index 53% rename from services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java rename to services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperationTest.java index 33ea0d197..417db2abf 100644 --- a/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsClientTest.java +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperationTest.java @@ -3,51 +3,49 @@ import static org.assertj.core.api.Assertions.assertThat; import com.fasterxml.jackson.databind.ObjectMapper; -import com.linkedin.openhouse.common.metrics.MetricsConstant; +import com.linkedin.openhouse.tables.model.CurrentSnapshotInfo; import com.linkedin.openhouse.tables.model.TableDto; -import io.micrometer.core.instrument.MeterRegistry; -import io.micrometer.core.instrument.simple.SimpleMeterRegistry; import java.io.IOException; import java.time.Duration; import java.util.HashMap; import java.util.Map; +import java.util.Optional; import java.util.concurrent.TimeUnit; import okhttp3.mockwebserver.MockResponse; import okhttp3.mockwebserver.MockWebServer; import okhttp3.mockwebserver.RecordedRequest; -import okhttp3.mockwebserver.SocketPolicy; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.springframework.http.client.reactive.ReactorClientHttpConnector; import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.reactive.function.client.WebClientResponseException; +import reactor.core.publisher.Mono; import reactor.netty.http.client.HttpClient; -class OptimizerStatsClientTest { +class OptimizerStatsPostCommitOperationTest { private static final String TABLE_UUID = "uuid-1"; private static final String DB = "db1"; private static final String TABLE = "tbl1"; + private static final long SNAPSHOT_ID = 9876543210L; private static final Duration BLOCK_MAX = Duration.ofSeconds(5); private MockWebServer server; - private MeterRegistry meterRegistry; private OptimizerStatsProperties properties; - private OptimizerStatsClient client; + private OptimizerStatsPostCommitOperation op; private final ObjectMapper mapper = new ObjectMapper(); @BeforeEach void setUp() throws IOException { server = new MockWebServer(); server.start(); - meterRegistry = new SimpleMeterRegistry(); properties = new OptimizerStatsProperties(); properties.setEnabled(true); properties.setBaseUri(server.url("/").toString()); properties.setPerAttemptTimeoutMs(1000L); - properties.setTotalTimeoutMs(2000L); properties.setMaxAttempts(3); - client = new OptimizerStatsClient(buildWebClient(), meterRegistry, properties); + op = new OptimizerStatsPostCommitOperation(buildWebClient(), properties); } @AfterEach @@ -56,25 +54,31 @@ void tearDown() throws IOException { } private WebClient buildWebClient() { - HttpClient httpClient = HttpClient.create(); return WebClient.builder() .baseUrl(properties.getBaseUri()) - .clientConnector(new ReactorClientHttpConnector(httpClient)) + .clientConnector(new ReactorClientHttpConnector(HttpClient.create())) .build(); } @Test - void report_optedIn_sendsRequestWithPayloadFieldsFromSummary() throws Exception { + void name_isStableTagValue() { + assertThat(op.name()).isEqualTo("optimizer_stats"); + } + + @Test + void prepare_optedIn_emitsRequestWithFullPayload() throws Exception { server.enqueue(new MockResponse().setResponseCode(200)); TableDto saved = optedInBuilder() .tableVersion("v3") .tableLocation("/data/tables/db1/tbl1") - .currentSnapshotSummary(summary()) + .currentSnapshot(snapshot()) .build(); - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + Optional> work = op.prepare(saved); + assertThat(work).isPresent(); + work.get().block(BLOCK_MAX); RecordedRequest req = server.takeRequest(2, TimeUnit.SECONDS); assertThat(req).isNotNull(); @@ -91,6 +95,7 @@ void report_optedIn_sendsRequestWithPayloadFieldsFromSummary() throws Exception @SuppressWarnings("unchecked") Map delta = (Map) stats.get("delta"); assertThat(snapshot) + .containsEntry("snapshotId", SNAPSHOT_ID) .containsEntry("tableVersion", "v3") .containsEntry("tableLocation", "/data/tables/db1/tbl1") .containsEntry("tableSizeBytes", 4096) @@ -100,116 +105,80 @@ void report_optedIn_sendsRequestWithPayloadFieldsFromSummary() throws Exception .containsEntry("numFilesDeleted", 2) .containsEntry("addedSizeBytes", 2048) .containsEntry("deletedSizeBytes", 1024); - - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") - .count()) - .isEqualTo(1.0); } @Test - void report_notOptedIn_skipsCallAndIncrementsSkippedCounter() { - TableDto saved = builderWithoutOptIn().currentSnapshotSummary(summary()).build(); - - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); - + void prepare_notOptedIn_returnsEmpty() { + TableDto saved = builderWithoutOptIn().currentSnapshot(snapshot()).build(); + assertThat(op.prepare(saved)).isEmpty(); assertThat(server.getRequestCount()).isZero(); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "opt_out") - .count()) - .isEqualTo(1.0); } @Test - void report_noSnapshot_skipsCallAndIncrementsSkippedCounter() { - TableDto saved = optedInBuilder().currentSnapshotSummary(null).build(); - - client.reportAsync(saved, null).block(BLOCK_MAX); - + void prepare_noSnapshot_returnsEmpty() { + TableDto saved = optedInBuilder().currentSnapshot(null).build(); + assertThat(op.prepare(saved)).isEmpty(); assertThat(server.getRequestCount()).isZero(); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_SKIPPED, "reason", "no_snapshot") - .count()) - .isEqualTo(1.0); } @Test - void report_5xxThenSuccess_retriesAndSucceeds() { + void prepare_5xxThenSuccess_retriesAndCompletes() { server.enqueue(new MockResponse().setResponseCode(503)); server.enqueue(new MockResponse().setResponseCode(200)); - TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + TableDto saved = optedInBuilder().currentSnapshot(snapshot()).build(); + op.prepare(saved).orElseThrow(AssertionError::new).block(BLOCK_MAX); assertThat(server.getRequestCount()).isEqualTo(2); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "success") - .count()) - .isEqualTo(1.0); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_ATTEMPTS, "outcome", "retryable_failure") - .count()) - .isEqualTo(1.0); } @Test - void report_allAttemptsFail_swallowsAndIncrementsFailedFinal() { + void prepare_allAttemptsFail_propagatesUnderlyingError() { properties.setMaxAttempts(2); server.enqueue(new MockResponse().setResponseCode(503)); server.enqueue(new MockResponse().setResponseCode(503)); - TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + TableDto saved = optedInBuilder().currentSnapshot(snapshot()).build(); + Mono chain = op.prepare(saved).orElseThrow(AssertionError::new); + assertThatChainErrors(chain, WebClientResponseException.class); assertThat(server.getRequestCount()).isEqualTo(2); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "server_error") - .count()) - .isEqualTo(1.0); } @Test - void report_4xxNonRetryable_doesNotRetry() { + void prepare_4xxNonRetryable_doesNotRetry() { server.enqueue(new MockResponse().setResponseCode(400)); - TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + TableDto saved = optedInBuilder().currentSnapshot(snapshot()).build(); + Mono chain = op.prepare(saved).orElseThrow(AssertionError::new); + assertThatChainErrors(chain, WebClientResponseException.class); assertThat(server.getRequestCount()).isEqualTo(1); - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "client_error") - .count()) - .isEqualTo(1.0); } - @Test - void report_perAttemptTimeoutTrips_recordsTimeout() { - properties.setPerAttemptTimeoutMs(150L); - properties.setTotalTimeoutMs(600L); - properties.setMaxAttempts(1); - client = new OptimizerStatsClient(buildWebClient(), meterRegistry, properties); - // Connection opens but the server never sends any response — forces Reactor's per-attempt - // .timeout(150ms) to fire with TimeoutException. - server.enqueue(new MockResponse().setSocketPolicy(SocketPolicy.NO_RESPONSE)); + // ---- helpers ---- - TableDto saved = optedInBuilder().currentSnapshotSummary(summary()).build(); - client.reportAsync(saved, saved.getCurrentSnapshotSummary()).block(BLOCK_MAX); + private void assertThatChainErrors(Mono chain, Class errorType) { + try { + chain.block(BLOCK_MAX); + throw new AssertionError("expected chain to signal an error of type " + errorType); + } catch (RuntimeException e) { + Throwable cause = unwrap(e); + assertThat(cause).isInstanceOf(errorType); + } + } - assertThat( - meterRegistry - .counter(MetricsConstant.OPTIMIZER_STATS_FAILED_FINAL, "outcome", "timeout") - .count()) - .isEqualTo(1.0); + private static Throwable unwrap(Throwable t) { + Throwable cur = t; + while (cur.getCause() != null && cur.getCause() != cur) { + cur = cur.getCause(); + } + return cur; } - // ---- helpers ---- + private static CurrentSnapshotInfo snapshot() { + return CurrentSnapshotInfo.builder().snapshotId(SNAPSHOT_ID).summary(summary()).build(); + } private static Map summary() { Map s = new HashMap<>(); @@ -224,7 +193,7 @@ private static Map summary() { private static TableDto.TableDtoBuilder optedInBuilder() { Map props = new HashMap<>(); - props.put(OptimizerStatsClient.OPT_IN_PROPERTY, "true"); + props.put(OptimizerStatsPostCommitOperation.OPT_IN_PROPERTY, "true"); return TableDto.builder() .tableUUID(TABLE_UUID) .databaseId(DB) diff --git a/services/tables/src/test/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcherTest.java b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcherTest.java new file mode 100644 index 000000000..14b6f2dde --- /dev/null +++ b/services/tables/src/test/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcherTest.java @@ -0,0 +1,177 @@ +package com.linkedin.openhouse.tables.services.postcommit; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.linkedin.openhouse.common.metrics.MetricsConstant; +import com.linkedin.openhouse.tables.model.TableDto; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.time.Duration; +import java.util.Arrays; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import reactor.core.publisher.Mono; + +class PostCommitDispatcherTest { + + private static final Duration BLOCK_MAX = Duration.ofSeconds(5); + + private MeterRegistry meterRegistry; + private PostCommitProperties properties; + private final TableDto savedDto = TableDto.builder().databaseId("db").tableId("t").build(); + + @BeforeEach + void setUp() { + meterRegistry = new SimpleMeterRegistry(); + properties = new PostCommitProperties(); + properties.setEnabled(true); + properties.setPerOpTimeoutMs(500L); + } + + private PostCommitDispatcher dispatcherWith(PostCommitOperation... ops) { + return new PostCommitDispatcher(Arrays.asList(ops), properties, meterRegistry); + } + + @Test + void dispatch_invokesEveryRegisteredOperationsPrepare() { + AtomicInteger aCount = new AtomicInteger(); + AtomicInteger bCount = new AtomicInteger(); + dispatcherWith(new EmptyOp("a", aCount), new EmptyOp("b", bCount)).dispatch(savedDto); + + assertThat(aCount.get()).isEqualTo(1); + assertThat(bCount.get()).isEqualTo(1); + } + + @Test + void decorate_emptyPrepare_incrementsSkippedAndReturnsEmpty() { + PostCommitOperation op = new EmptyOp("opx", new AtomicInteger()); + + Optional> decorated = dispatcherWith(op).decorate(op, savedDto); + + assertThat(decorated).isEmpty(); + assertThat(meterRegistry.counter(MetricsConstant.POSTCOMMIT_OP_SKIPPED, "op", "opx").count()) + .isEqualTo(1.0); + } + + @Test + void decorate_successfulWork_recordsDurationWithSuccessOutcome() { + PostCommitOperation op = new SimpleOp("ok", Mono::empty); + + dispatcherWith(op).decorate(op, savedDto).orElseThrow(AssertionError::new).block(BLOCK_MAX); + + assertThat( + meterRegistry + .timer(MetricsConstant.POSTCOMMIT_OP_DURATION, "op", "ok", "outcome", "success") + .count()) + .isEqualTo(1L); + } + + @Test + void decorate_workSignalsError_incrementsFailedWithClassifiedOutcomeAndSwallowsError() { + RuntimeException boom = new RuntimeException("boom"); + PostCommitOperation op = new SimpleOp("broke", () -> Mono.error(boom)); + + // Dispatcher swallows the error — block() returns normally. + dispatcherWith(op).decorate(op, savedDto).orElseThrow(AssertionError::new).block(BLOCK_MAX); + + assertThat( + meterRegistry + .counter( + MetricsConstant.POSTCOMMIT_OP_FAILED, "op", "broke", "outcome", "unknown_error") + .count()) + .isEqualTo(1.0); + } + + @Test + void decorate_prepareThrowsSynchronously_incrementsFailedAndDispatchContinuesToLaterOps() { + AtomicInteger laterCount = new AtomicInteger(); + PostCommitOperation thrower = + new PostCommitOperation() { + @Override + public String name() { + return "thrower"; + } + + @Override + public Optional> prepare(TableDto dto) { + throw new IllegalStateException("nope"); + } + }; + PostCommitOperation later = new EmptyOp("later", laterCount); + + dispatcherWith(thrower, later).dispatch(savedDto); + + assertThat( + meterRegistry + .counter( + MetricsConstant.POSTCOMMIT_OP_FAILED, + "op", + "thrower", + "outcome", + "prepare_threw") + .count()) + .isEqualTo(1.0); + assertThat(laterCount.get()) + .as("later operations must still run after a synchronous prepare() throw") + .isEqualTo(1); + } + + @Test + void decorate_workExceedsPerOpTimeout_incrementsFailedWithTimeoutOutcome() { + properties.setPerOpTimeoutMs(50L); + PostCommitOperation op = new SimpleOp("slow", Mono::never); + + dispatcherWith(op).decorate(op, savedDto).orElseThrow(AssertionError::new).block(BLOCK_MAX); + + assertThat( + meterRegistry + .counter(MetricsConstant.POSTCOMMIT_OP_FAILED, "op", "slow", "outcome", "timeout") + .count()) + .isEqualTo(1.0); + } + + // ---- helpers ---- + + private static class EmptyOp implements PostCommitOperation { + private final String name; + private final AtomicInteger calls; + + EmptyOp(String name, AtomicInteger calls) { + this.name = name; + this.calls = calls; + } + + @Override + public String name() { + return name; + } + + @Override + public Optional> prepare(TableDto dto) { + calls.incrementAndGet(); + return Optional.empty(); + } + } + + private static class SimpleOp implements PostCommitOperation { + private final String name; + private final java.util.function.Supplier> work; + + SimpleOp(String name, java.util.function.Supplier> work) { + this.name = name; + this.work = work; + } + + @Override + public String name() { + return name; + } + + @Override + public Optional> prepare(TableDto dto) { + return Optional.of(work.get()); + } + } +} From 119c609f9cd36a07bdcd8a2be2992d03e3e332dc Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 14:49:25 -0700 Subject: [PATCH 11/14] docs: collapse

/

    // noise in postcommit javadoc Per review feedback that the HTML tags made the diff ugly. Drop paragraph breaks where the comment can be one sentence/paragraph, drop bullet lists in favor of inline prose, drop / emphasis entirely. No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tables/model/CurrentSnapshotInfo.java | 12 +++---- .../openhouse/tables/model/TableDto.java | 3 +- .../optimizer/OptimizerStatsConfig.java | 17 +++++----- .../OptimizerStatsPostCommitOperation.java | 20 ++++------- .../optimizer/OptimizerStatsProperties.java | 7 ++-- .../optimizer/OptimizerStatsRequest.java | 7 ++-- .../postcommit/PostCommitDispatcher.java | 33 ++++++------------- .../postcommit/PostCommitOperation.java | 28 ++++------------ .../postcommit/PostCommitProperties.java | 19 ++++------- 9 files changed, 50 insertions(+), 96 deletions(-) diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java index 9f363b4ec..74644ac3f 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java @@ -7,13 +7,11 @@ import lombok.Getter; /** - * In-memory snapshot of the Iceberg current-snapshot metadata that was loaded alongside a {@link - * TableDto}. Carries only fields already materialized by the catalog client — never triggers a - * separate HDFS / object-store read. - * - *

    Present whenever the table has at least one committed snapshot at construction time; absent - * (modeled as {@link java.util.Optional#empty()} on {@link TableDto}) when the table has no - * committed data — e.g. a {@code CREATE TABLE} with no rows yet. + * In-memory snapshot of the Iceberg current-snapshot metadata loaded alongside a {@link TableDto}. + * Carries only fields already materialized by the catalog client — never triggers a separate HDFS + * or object-store read. Present whenever the table has at least one committed snapshot at + * construction time; absent (modeled as {@link java.util.Optional#empty()} on {@link TableDto}) for + * tables with no committed data, e.g. a {@code CREATE TABLE} with no rows yet. */ @Getter @Builder diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java index 16608f286..bd44653e7 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java @@ -89,8 +89,7 @@ public class TableDto { * In-memory current-snapshot metadata captured when this {@code TableDto} was built from an * Iceberg {@code Table}. Present whenever the underlying table has at least one committed * snapshot at that point; absent for tables with no committed data (e.g. {@code CREATE TABLE} - * with no rows). Not persisted, not part of equality. Stored nullable internally; consumers must - * read through {@link #getCurrentSnapshot()} to get the {@link Optional}. + * with no rows). Not persisted, not part of equality. Read through {@link #getCurrentSnapshot()}. */ @Getter(AccessLevel.NONE) @Transient diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java index 8061fb644..351ac604d 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java @@ -9,9 +9,9 @@ import reactor.netty.http.client.HttpClient; /** - * Wiring for the post-commit Tables → Optimizer stats push. Only active when {@code - * optimizer.stats.enabled=true}; otherwise no WebClient or client bean is constructed and the - * on-commit hook in {@code IcebergSnapshotsServiceImpl} is a no-op. + * Wiring for the post-commit Tables → Optimizer stats push. Active only when {@code + * optimizer.stats.enabled=true}; otherwise no WebClient bean is constructed and the dispatcher sees + * no optimizer-stats operation. */ @Configuration @EnableConfigurationProperties(OptimizerStatsProperties.class) @@ -19,12 +19,11 @@ public class OptimizerStatsConfig { /** - * Dedicated WebClient for the optimizer stats endpoint. Per-attempt and outer timeouts are - * applied at the call site on the Reactor chain (per-attempt in {@link - * OptimizerStatsPostCommitOperation}, outer per-op in the {@code PostCommitDispatcher}) — they - * are not configured on the underlying Netty client so that the timeout always emerges as a - * standard {@link java.util.concurrent.TimeoutException} (not a Netty {@code - * ReadTimeoutException}), which keeps the dispatcher's outcome classification simple. + * Dedicated WebClient for the optimizer stats endpoint. Per-attempt timeout is applied on the + * Reactor chain in {@link OptimizerStatsPostCommitOperation} and the outer per-op timeout in + * {@code PostCommitDispatcher}; neither is configured on the Netty client so the timeout always + * emerges as a standard {@link java.util.concurrent.TimeoutException} rather than a Netty {@code + * ReadTimeoutException}, keeping the dispatcher's outcome classification simple. */ @Bean("optimizerStatsWebClient") public WebClient optimizerStatsWebClient(OptimizerStatsProperties properties) { diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java index 0a82d8598..eb858dece 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java @@ -22,19 +22,13 @@ * {@link PostCommitOperation} that PUTs a snapshot-stats record to the optimizer's per-table stats * endpoint. {@link #prepare(TableDto)} returns a {@link Mono} that completes on HTTP 2xx and * signals an error otherwise; the dispatcher owns timeout, subscription, error swallowing, and - * metric emission. - * - *

    Skipped (operation returns {@link Optional#empty()}) when the table is not opted in via the - * {@link #OPT_IN_PROPERTY} table property, or when no current snapshot is present (e.g. {@code - * CREATE TABLE} with no rows yet). - * - *

    Internal retry: bounded by {@link OptimizerStatsProperties#getMaxAttempts()}, fires only on - * retryable errors (network, {@link TimeoutException}, HTTP 408 / 429 / 5xx). The dispatcher's - * outer per-op timeout is the hard ceiling on the whole chain. - * - *

    Bean is only wired when {@code optimizer.stats.enabled=true}. The path constant is - * intentionally duplicated from {@code TableStatsController.TABLE_PATH_TEMPLATE}; keep in sync. - * Tables service does not take a compile-time dependency on the optimizer service jar. + * metric emission. Returns {@link Optional#empty()} when the table is not opted in via {@link + * #OPT_IN_PROPERTY} or has no current snapshot. Internal retry is bounded by {@link + * OptimizerStatsProperties#getMaxAttempts()} and fires only on retryable errors (network, {@link + * TimeoutException}, HTTP 408 / 429 / 5xx); the dispatcher's per-op timeout is the hard ceiling. + * Bean wired only when {@code optimizer.stats.enabled=true}. Path constant is intentionally + * duplicated from {@code TableStatsController.TABLE_PATH_TEMPLATE} (keep in sync) so the tables + * service does not take a compile-time dependency on the optimizer jar. */ @Slf4j @Component diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java index 53b1844be..ca2b78a2d 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java @@ -6,10 +6,9 @@ /** * Configuration for the post-commit stats push from the Tables Service to the Optimizer's stats - * endpoint. Property prefix: {@code optimizer.stats}. - * - *

    When {@code enabled} is false (the default), no client bean is constructed and the push is a - * no-op on every commit. Environments that want the feed turn it on and set {@link #baseUri}. + * endpoint. Property prefix {@code optimizer.stats}. When {@code enabled} is false (the default), + * no client bean is constructed and the operation is absent from the dispatcher. Environments that + * want the feed turn it on and set {@link #baseUri}. */ @ConfigurationProperties("optimizer.stats") @Getter diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java index df929f666..2496436d3 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java @@ -9,11 +9,8 @@ /** * Wire body for {@code PUT /v1/optimizer/stats/{tableUuid}}. Mirrors the optimizer's {@code - * UpsertTableStatsRequest} field-for-field — see {@code - * services/optimizer/src/main/java/com/linkedin/openhouse/optimizer/api/spec/UpsertTableStatsRequest.java}. - * - *

    Tables service owns its own copy so that the wire contract is explicit at the call site and - * the optimizer client jar is not a compile-time dependency. + * UpsertTableStatsRequest} field-for-field. Duplicated here so the tables service does not take a + * compile-time dependency on the optimizer jar; keep in sync with that type. */ @Data @Builder diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java index d205aee12..44c8a166b 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java @@ -17,23 +17,12 @@ import reactor.core.publisher.Mono; /** - * Runs {@link PostCommitOperation}s after a successful Iceberg commit. Best-effort and bounded: - * each operation gets a wall-clock timeout, errors are swallowed after metric/log, and dispatch - * itself is fire-and-forget so the commit thread is never blocked on operation work. - * - *

    Why async. A synchronous post-commit hook converts a downstream outage (the optimizer - * being slow or unavailable, a network glitch) into a Tables-Service write outage — the post-commit - * push is a best-effort scheduling signal, not a write-correctness step, and its blast radius must - * not include the write path. The crash-loss window (a JVM dying after commit and before the HTTP - * push completes) is acceptable because operations are designed to be cumulative: the next commit - * carries the same state forward and the consumer self-corrects from missing data. - * - *

    Why the dispatcher owns timeouts. Operations only describe payload + endpoint. The - * timeout/swallow/subscribe machinery lives here once so individual operations stay small and so - * the contract across operations is uniform (one knob: {@code - * tables.postcommit.per-op-timeout-ms}). - * - *

    Bean is only wired when {@code tables.postcommit.enabled=true}. + * Runs {@link PostCommitOperation}s after a successful Iceberg commit. Each operation gets a + * wall-clock timeout ({@code tables.postcommit.per-op-timeout-ms}), errors are swallowed after + * metric/log, and dispatch is fire-and-forget so the commit thread is never blocked. Operations + * describe payload and endpoint only — the timeout/swallow/subscribe machinery lives here so the + * contract across operations is uniform. Bean wired only when {@code + * tables.postcommit.enabled=true}. */ @Slf4j @Component @@ -65,12 +54,10 @@ public void dispatch(TableDto savedDto) { } /** - * Returns the fully-decorated {@link Mono} for {@code op} (per-op timeout, success / error metric - * emission, error swallow) without subscribing. Emits the {@code skipped} or {@code - * prepare_threw} metric synchronously and returns {@link Optional#empty()} in those cases. - * - *

    Package-private so tests can {@code .block()} on the decorated chain rather than polling for - * metric emission after a fire-and-forget {@code subscribe()}. + * Returns the fully-decorated {@link Mono} for {@code op} (per-op timeout, success/error metric + * emission, error swallow) without subscribing. Emits {@code skipped} or {@code prepare_threw} + * synchronously and returns {@link Optional#empty()} in those cases. Package-private so tests can + * {@code .block()} on the chain rather than poll for metric emission. */ Optional> decorate(PostCommitOperation op, TableDto savedDto) { Optional> work; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java index 5ba49b371..b94804b34 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java @@ -6,24 +6,10 @@ /** * A best-effort, bounded, asynchronous action run by the Tables Service after a successful Iceberg - * commit. Operations are invoked by {@link PostCommitDispatcher} and are subject to a single per-op - * wall-clock timeout owned by the dispatcher. Operations never affect commit correctness: any error - * they signal is recorded as a metric and a log line, then swallowed. - * - *

    Implementations describe what to do; the dispatcher owns how it runs - * (subscription, timeout, error handling, metric emission). Each operation contributes: - * - *

      - *
    • {@link #name()} — short identifier used as a metric tag. - *
    • {@link #prepare(TableDto)} — returns the work to perform, or {@link Optional#empty()} when - * the operation does not apply to this commit (e.g. table not opted in, no committed - * snapshot). Returning empty is a normal outcome; the dispatcher emits a {@code skipped} - * metric and moves on. - *
    - * - *

    The returned {@link Mono} must not subscribe itself, apply its own outer timeout, or swallow - * errors — those concerns belong to the dispatcher. Implementations may apply internal retries with - * bounded budgets. + * commit. Operations are invoked by {@link PostCommitDispatcher}, which owns the per-op timeout, + * subscription, error swallowing, and metric emission. An error from prepare() is recorded and + * dropped — it never affects commit correctness. Implementations may apply internal retries with + * bounded budgets but must not subscribe themselves or apply an outer timeout. */ public interface PostCommitOperation { @@ -31,9 +17,9 @@ public interface PostCommitOperation { String name(); /** - * Build the work to run for {@code savedDto}, or return {@link Optional#empty()} if this - * operation does not apply. The dispatcher only invokes the returned {@link Mono} after applying - * its own timeout / error-swallow plumbing. + * Build the work to run for {@code savedDto}, or {@link Optional#empty()} when the operation does + * not apply (e.g. table not opted in, no committed snapshot). The dispatcher records empty as a + * {@code skipped} metric. */ Optional> prepare(TableDto savedDto); } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java index 8a153cd79..bc757d37f 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java @@ -5,27 +5,22 @@ import org.springframework.boot.context.properties.ConfigurationProperties; /** - * Configuration for the Tables Service post-commit operation framework. Property prefix: {@code - * tables.postcommit}. - * - *

    When {@code enabled} is {@code false} (the default), no dispatcher bean is constructed and - * registered {@link PostCommitOperation} beans, if any, are never invoked. Per-OH-instance roll-out - * flips a single flag. + * Configuration for the Tables Service post-commit operation framework. Property prefix {@code + * tables.postcommit}. When {@code enabled} is false (the default), no dispatcher bean is + * constructed and registered {@link PostCommitOperation} beans are never invoked. */ @ConfigurationProperties("tables.postcommit") @Getter @Setter public class PostCommitProperties { - /** Master switch. When {@code false}, no operations are dispatched on commit. */ + /** Master switch. When false, no operations are dispatched on commit. */ private boolean enabled = false; /** - * Wall-clock ceiling applied by the dispatcher to each operation's {@link - * PostCommitOperation#prepare(com.linkedin.openhouse.tables.model.TableDto)} {@code Mono}. Bounds - * how long a misbehaving operation can keep resources (connections, threads) tied up. Does not - * bound commit-thread latency — operations run on the executing reactive scheduler, not on the - * commit thread. Default 3000 ms. + * Wall-clock ceiling applied by the dispatcher to each operation's prepared {@code Mono}. Bounds + * resource occupancy (connections, threads) but does not block the commit thread. Default 3000 + * ms. */ private long perOpTimeoutMs = 3000L; } From 1c7fbd79cdf28c511ac90d8122f7a78ba3241725 Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 14:57:49 -0700 Subject: [PATCH 12/14] docs: switch to // comments with line breaks, drop block javadoc Spotless's googlejavaformat rewrites multi-paragraph block javadoc to re-insert

    tags. // comments are left untouched. Per review feedback that the source should read like prose with paragraph breaks, not like a webpage, swap all multi-paragraph descriptions on the new files to // blocks. Single-line descriptors that were javadoc become a single // line above the declaration. Complete sentences throughout. No behavior change. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tables/model/CurrentSnapshotInfo.java | 25 +++++---- .../openhouse/tables/model/TableDto.java | 15 +++--- .../optimizer/OptimizerStatsConfig.java | 25 ++++----- .../OptimizerStatsPostCommitOperation.java | 53 ++++++++++--------- .../optimizer/OptimizerStatsProperties.java | 36 ++++++------- .../optimizer/OptimizerStatsRequest.java | 26 ++++----- .../postcommit/PostCommitDispatcher.java | 46 ++++++++-------- .../postcommit/PostCommitOperation.java | 27 +++++----- .../postcommit/PostCommitProperties.java | 23 ++++---- 9 files changed, 136 insertions(+), 140 deletions(-) diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java index 74644ac3f..90437d418 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/CurrentSnapshotInfo.java @@ -6,26 +6,25 @@ import lombok.EqualsAndHashCode; import lombok.Getter; -/** - * In-memory snapshot of the Iceberg current-snapshot metadata loaded alongside a {@link TableDto}. - * Carries only fields already materialized by the catalog client — never triggers a separate HDFS - * or object-store read. Present whenever the table has at least one committed snapshot at - * construction time; absent (modeled as {@link java.util.Optional#empty()} on {@link TableDto}) for - * tables with no committed data, e.g. a {@code CREATE TABLE} with no rows yet. - */ +// In-memory snapshot of the Iceberg current-snapshot metadata that was loaded alongside a +// TableDto. +// +// This carries only fields already materialized by the catalog client. Constructing it never +// triggers a separate HDFS or object-store read. +// +// The value is present whenever the underlying table has at least one committed snapshot at +// construction time. It is absent (modeled as Optional.empty() on TableDto) for tables with no +// committed data, such as a CREATE TABLE with no rows yet. @Getter @Builder @AllArgsConstructor @EqualsAndHashCode public class CurrentSnapshotInfo { - /** Iceberg snapshot ID (decimal long). Stable per commit; usable as an idempotency token. */ + // Iceberg snapshot ID. Stable per commit; usable as an idempotency token. private final long snapshotId; - /** - * Iceberg {@code Snapshot.summary()} map, unmodified. Keys include {@code total-data-files}, - * {@code total-files-size}, {@code added-data-files}, {@code deleted-data-files}, {@code - * added-files-size}, {@code removed-files-size}. - */ + // Iceberg Snapshot.summary() map, unmodified. Keys include total-data-files, total-files-size, + // added-data-files, deleted-data-files, added-files-size, removed-files-size. private final Map summary; } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java index bd44653e7..59dc2e80f 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/model/TableDto.java @@ -85,18 +85,19 @@ public class TableDto { private boolean replaceCommit; - /** - * In-memory current-snapshot metadata captured when this {@code TableDto} was built from an - * Iceberg {@code Table}. Present whenever the underlying table has at least one committed - * snapshot at that point; absent for tables with no committed data (e.g. {@code CREATE TABLE} - * with no rows). Not persisted, not part of equality. Read through {@link #getCurrentSnapshot()}. - */ + // In-memory current-snapshot metadata captured when this TableDto was built from an Iceberg + // Table. + // + // Present whenever the underlying table has at least one committed snapshot at that point. + // Absent for tables with no committed data, such as a CREATE TABLE with no rows yet. + // + // Not persisted and not part of equality. Read through getCurrentSnapshot(). @Getter(AccessLevel.NONE) @Transient @EqualsAndHashCode.Exclude private CurrentSnapshotInfo currentSnapshot; - /** Returns the current-snapshot metadata if any, else {@link Optional#empty()}. */ + // Returns the current-snapshot metadata if any, else Optional.empty(). public Optional getCurrentSnapshot() { return Optional.ofNullable(currentSnapshot); } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java index 351ac604d..47613f98c 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsConfig.java @@ -8,23 +8,24 @@ import org.springframework.web.reactive.function.client.WebClient; import reactor.netty.http.client.HttpClient; -/** - * Wiring for the post-commit Tables → Optimizer stats push. Active only when {@code - * optimizer.stats.enabled=true}; otherwise no WebClient bean is constructed and the dispatcher sees - * no optimizer-stats operation. - */ +// Wiring for the post-commit Tables-to-Optimizer stats push. +// +// This configuration is active only when optimizer.stats.enabled=true. When the flag is false, +// no WebClient bean is constructed, and the dispatcher sees no optimizer-stats operation. @Configuration @EnableConfigurationProperties(OptimizerStatsProperties.class) @ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") public class OptimizerStatsConfig { - /** - * Dedicated WebClient for the optimizer stats endpoint. Per-attempt timeout is applied on the - * Reactor chain in {@link OptimizerStatsPostCommitOperation} and the outer per-op timeout in - * {@code PostCommitDispatcher}; neither is configured on the Netty client so the timeout always - * emerges as a standard {@link java.util.concurrent.TimeoutException} rather than a Netty {@code - * ReadTimeoutException}, keeping the dispatcher's outcome classification simple. - */ + // Dedicated WebClient for the optimizer stats endpoint. + // + // The per-attempt timeout is applied on the Reactor chain in OptimizerStatsPostCommitOperation, + // and the outer per-op timeout is applied by PostCommitDispatcher. Neither timeout is + // configured on the underlying Netty client. + // + // This arrangement ensures that any timeout always emerges as a standard + // java.util.concurrent.TimeoutException rather than a Netty ReadTimeoutException, which keeps + // the dispatcher's outcome classification simple. @Bean("optimizerStatsWebClient") public WebClient optimizerStatsWebClient(OptimizerStatsProperties properties) { return WebClient.builder() diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java index eb858dece..756da2d10 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsPostCommitOperation.java @@ -18,38 +18,38 @@ import reactor.util.retry.Retry; import reactor.util.retry.RetrySpec; -/** - * {@link PostCommitOperation} that PUTs a snapshot-stats record to the optimizer's per-table stats - * endpoint. {@link #prepare(TableDto)} returns a {@link Mono} that completes on HTTP 2xx and - * signals an error otherwise; the dispatcher owns timeout, subscription, error swallowing, and - * metric emission. Returns {@link Optional#empty()} when the table is not opted in via {@link - * #OPT_IN_PROPERTY} or has no current snapshot. Internal retry is bounded by {@link - * OptimizerStatsProperties#getMaxAttempts()} and fires only on retryable errors (network, {@link - * TimeoutException}, HTTP 408 / 429 / 5xx); the dispatcher's per-op timeout is the hard ceiling. - * Bean wired only when {@code optimizer.stats.enabled=true}. Path constant is intentionally - * duplicated from {@code TableStatsController.TABLE_PATH_TEMPLATE} (keep in sync) so the tables - * service does not take a compile-time dependency on the optimizer jar. - */ +// A PostCommitOperation that PUTs a snapshot-stats record to the optimizer's per-table stats +// endpoint. +// +// prepare() returns a Mono that completes on HTTP 2xx and signals an error otherwise. The +// dispatcher owns the timeout, the subscription, error swallowing, and metric emission. The +// returned value is Optional.empty() when the table is not opted in via OPT_IN_PROPERTY or has +// no current snapshot. +// +// Internal retry is bounded by OptimizerStatsProperties.maxAttempts and fires only on retryable +// errors: network failures, a TimeoutException, or HTTP 408 / 429 / 5xx responses. The +// dispatcher's per-op timeout is the hard ceiling on the whole chain. +// +// The bean is wired only when optimizer.stats.enabled=true. PATH_TEMPLATE is intentionally +// duplicated from TableStatsController.TABLE_PATH_TEMPLATE so the tables service does not take a +// compile-time dependency on the optimizer jar. Keep both copies in sync. @Slf4j @Component @ConditionalOnProperty(prefix = "optimizer.stats", name = "enabled", havingValue = "true") public class OptimizerStatsPostCommitOperation implements PostCommitOperation { - /** Metric tag value for {@code op}. */ + // Metric tag value for the "op" tag. static final String OP_NAME = "optimizer_stats"; - /** - * Per-call URL path. Intentionally duplicated from {@code - * TableStatsController.TABLE_PATH_TEMPLATE}; keep in sync. - */ + // Per-call URL path. Intentionally duplicated from TableStatsController.TABLE_PATH_TEMPLATE so + // that we avoid a compile-time dependency on the optimizer jar. Keep both copies in sync. static final String PATH_TEMPLATE = "/v1/optimizer/stats/{tableUuid}"; - /** Table-property key that opts a table in for the post-commit stats push. */ + // Table-property key that opts a table in for the post-commit stats push. static final String OPT_IN_PROPERTY = "maintenance.optimizer.stats.enabled"; - /** Iceberg snapshot-summary keys we read. All values are decimal-string longs. */ + // Iceberg snapshot-summary keys we read. All values are decimal-string longs. static final String SUMMARY_TOTAL_DATA_FILES = "total-data-files"; - static final String SUMMARY_TOTAL_FILES_SIZE = "total-files-size"; static final String SUMMARY_ADDED_DATA_FILES = "added-data-files"; static final String SUMMARY_DELETED_DATA_FILES = "deleted-data-files"; @@ -100,13 +100,14 @@ public Optional> prepare(TableDto savedDto) { return Optional.of(chain); } - /** {@code true} iff the table's properties contain the literal opt-in value {@code "true"}. */ + // Returns true when the table's properties contain the literal opt-in value "true" for the + // configured OPT_IN_PROPERTY. private boolean isOptedIn(TableDto saved) { Map props = saved.getTableProperties(); return props != null && "true".equals(props.get(OPT_IN_PROPERTY)); } - /** Build the wire body. Missing summary keys default to 0L. */ + // Builds the wire body. Missing summary keys default to 0L. OptimizerStatsRequest buildRequest(TableDto saved, CurrentSnapshotInfo snapshot) { Map summary = snapshot.getSummary(); OptimizerStatsRequest.Snapshot snapshotPayload = @@ -132,10 +133,10 @@ OptimizerStatsRequest buildRequest(TableDto saved, CurrentSnapshotInfo snapshot) .build(); } - /** - * Retryable errors: per-attempt timeout, network-level failures, 5xx, 408, 429. Other 4xx are - * client errors — retrying won't fix them. - */ + // Returns true for errors that a later attempt could plausibly succeed against: a per-attempt + // timeout, a network-level failure, or an HTTP 5xx / 408 / 429 response. + // + // Other 4xx responses are client errors that retries cannot fix, so we fail fast on them. boolean isRetryable(Throwable e) { if (e instanceof TimeoutException) { return true; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java index ca2b78a2d..877ce3423 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsProperties.java @@ -4,36 +4,32 @@ import lombok.Setter; import org.springframework.boot.context.properties.ConfigurationProperties; -/** - * Configuration for the post-commit stats push from the Tables Service to the Optimizer's stats - * endpoint. Property prefix {@code optimizer.stats}. When {@code enabled} is false (the default), - * no client bean is constructed and the operation is absent from the dispatcher. Environments that - * want the feed turn it on and set {@link #baseUri}. - */ +// Configuration for the post-commit stats push from the Tables Service to the Optimizer's stats +// endpoint. Property prefix is optimizer.stats. +// +// When enabled is false (the default), no client bean is constructed, and the operation is +// absent from the dispatcher. Environments that want the feed turn it on and set baseUri. @ConfigurationProperties("optimizer.stats") @Getter @Setter public class OptimizerStatsProperties { - /** Master switch. When {@code false}, no HTTP push is attempted and no client bean is wired. */ + // Master switch. When false, no HTTP push is attempted and no client bean is wired. private boolean enabled = false; - /** - * Base URI of the Optimizer service. Path {@code /v1/optimizer/stats/{tableUuid}} is appended at - * call time. No default — required when {@link #enabled} is {@code true}. - */ + // Base URI of the Optimizer service. The path /v1/optimizer/stats/{tableUuid} is appended at + // call time. No default; required when enabled is true. private String baseUri; - /** - * Per-attempt HTTP timeout in milliseconds. Bounds each individual attempt so retries can fit - * inside the dispatcher's outer per-op budget ({@code tables.postcommit.per-op-timeout-ms}, - * default 3000 ms). Default 1000 ms. - */ + // Per-attempt HTTP timeout in milliseconds. + // + // Bounds each individual attempt so retries can fit inside the dispatcher's outer per-op budget + // (tables.postcommit.per-op-timeout-ms, default 3000 ms). Default is 1000 ms. private long perAttemptTimeoutMs = 1000L; - /** - * Total attempt count (initial try plus retries). Default 3. Retries fire only on retryable - * errors (network, timeout, 408/429/5xx); other 4xx fail fast without retry. - */ + // Total attempt count (the initial try plus retries). Default is 3. + // + // Retries fire only on retryable errors: network failures, a timeout, or an HTTP 408 / 429 / + // 5xx response. Other 4xx responses fail fast without retry. private int maxAttempts = 3; } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java index 2496436d3..8239184ec 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java @@ -7,11 +7,11 @@ import lombok.Data; import lombok.NoArgsConstructor; -/** - * Wire body for {@code PUT /v1/optimizer/stats/{tableUuid}}. Mirrors the optimizer's {@code - * UpsertTableStatsRequest} field-for-field. Duplicated here so the tables service does not take a - * compile-time dependency on the optimizer jar; keep in sync with that type. - */ +// Wire body for PUT /v1/optimizer/stats/{tableUuid}. +// +// This mirrors the optimizer's UpsertTableStatsRequest field-for-field. The type is duplicated +// here so that the tables service does not take a compile-time dependency on the optimizer jar. +// Keep both copies in sync. @Data @Builder @NoArgsConstructor @@ -34,20 +34,16 @@ public static class Stats { private Delta delta; } - /** - * Point-in-time snapshot metrics. Map to optimizer's {@code - * TableStatsPayload.SnapshotMetricsDto}. - */ + // Point-in-time snapshot metrics. Maps to the optimizer's + // TableStatsPayload.SnapshotMetricsDto. @Data @Builder @NoArgsConstructor @AllArgsConstructor @JsonInclude(JsonInclude.Include.NON_NULL) public static class Snapshot { - /** - * Iceberg snapshot ID. Sent so the optimizer can use it as an idempotency token on upsert and - * reject out-of-order replays — server-side wiring tracked in BDP-102985. - */ + // Iceberg snapshot ID. Sent so the optimizer can use it as an idempotency token on upsert and + // reject out-of-order replays. Server-side wiring is tracked in BDP-102985. private Long snapshotId; private String tableVersion; @@ -56,9 +52,7 @@ public static class Snapshot { private Long numCurrentFiles; } - /** - * Per-commit incremental counters. Map to optimizer's {@code TableStatsPayload.CommitDeltaDto}. - */ + // Per-commit incremental counters. Maps to the optimizer's TableStatsPayload.CommitDeltaDto. @Data @Builder @NoArgsConstructor diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java index 44c8a166b..5a04e2df7 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitDispatcher.java @@ -16,14 +16,16 @@ import org.springframework.web.reactive.function.client.WebClientResponseException; import reactor.core.publisher.Mono; -/** - * Runs {@link PostCommitOperation}s after a successful Iceberg commit. Each operation gets a - * wall-clock timeout ({@code tables.postcommit.per-op-timeout-ms}), errors are swallowed after - * metric/log, and dispatch is fire-and-forget so the commit thread is never blocked. Operations - * describe payload and endpoint only — the timeout/swallow/subscribe machinery lives here so the - * contract across operations is uniform. Bean wired only when {@code - * tables.postcommit.enabled=true}. - */ +// Runs PostCommitOperations after a successful Iceberg commit. +// +// Each operation receives a wall-clock timeout from tables.postcommit.per-op-timeout-ms. Any +// error the operation signals is recorded as a metric and a log line, then swallowed. Dispatch +// is fire-and-forget, so the commit thread is never blocked on operation work. +// +// Operations describe payload and endpoint only. The timeout, error swallowing, subscription, +// and metric emission all live here, so the contract across operations stays uniform. +// +// The bean is constructed only when tables.postcommit.enabled=true. @Slf4j @Component @EnableConfigurationProperties(PostCommitProperties.class) @@ -43,22 +45,24 @@ public PostCommitDispatcher( this.meterRegistry = meterRegistry; } - /** - * Dispatch all registered operations for {@code savedDto}. Returns immediately on the calling - * thread; each operation runs on its underlying reactive scheduler. Never throws. - */ + // Dispatches all registered operations for savedDto. + // + // Returns immediately on the calling thread. Each operation runs on its underlying reactive + // scheduler. This method never throws. public void dispatch(TableDto savedDto) { for (PostCommitOperation op : operations) { decorate(op, savedDto).ifPresent(Mono::subscribe); } } - /** - * Returns the fully-decorated {@link Mono} for {@code op} (per-op timeout, success/error metric - * emission, error swallow) without subscribing. Emits {@code skipped} or {@code prepare_threw} - * synchronously and returns {@link Optional#empty()} in those cases. Package-private so tests can - * {@code .block()} on the chain rather than poll for metric emission. - */ + // Returns the fully-decorated Mono for op without subscribing to it. + // + // The decoration applies the per-op timeout, records the success or error metric, and swallows + // any error. When the operation does not apply (or its prepare() throws synchronously), this + // method emits the "skipped" or "prepare_threw" metric and returns Optional.empty(). + // + // Package-private so that tests can .block() on the chain rather than poll for metric emission + // after a fire-and-forget subscription. Optional> decorate(PostCommitOperation op, TableDto savedDto) { Optional> work; try { @@ -109,10 +113,8 @@ Optional> decorate(PostCommitOperation op, TableDto savedDto) { return Optional.of(decorated); } - /** - * Map a terminal error to a small set of outcome tags. Kept here so all operations share the same - * taxonomy. - */ + // Maps a terminal error to a small set of outcome tags. The classifier lives here so that all + // operations share the same taxonomy. private static String classifyOutcome(Throwable e) { if (e instanceof TimeoutException) { return "timeout"; diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java index b94804b34..6cf99cd7e 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitOperation.java @@ -4,22 +4,23 @@ import java.util.Optional; import reactor.core.publisher.Mono; -/** - * A best-effort, bounded, asynchronous action run by the Tables Service after a successful Iceberg - * commit. Operations are invoked by {@link PostCommitDispatcher}, which owns the per-op timeout, - * subscription, error swallowing, and metric emission. An error from prepare() is recorded and - * dropped — it never affects commit correctness. Implementations may apply internal retries with - * bounded budgets but must not subscribe themselves or apply an outer timeout. - */ +// A best-effort, bounded, asynchronous action that the Tables Service runs after a successful +// Iceberg commit. +// +// Operations are invoked by PostCommitDispatcher, which owns the per-op timeout, the +// subscription, error swallowing, and metric emission. An error signaled from prepare() is +// recorded and dropped, so it never affects commit correctness. +// +// Implementations describe what to push and where. They may apply internal retries within a +// bounded budget. They must not subscribe themselves or apply an outer timeout, because the +// dispatcher already owns both concerns. public interface PostCommitOperation { - /** Short, stable identifier for this operation. Used as the {@code op} metric tag. */ + // Returns a short, stable identifier for this operation. Used as the "op" metric tag. String name(); - /** - * Build the work to run for {@code savedDto}, or {@link Optional#empty()} when the operation does - * not apply (e.g. table not opted in, no committed snapshot). The dispatcher records empty as a - * {@code skipped} metric. - */ + // Builds the work to run for savedDto, or returns Optional.empty() when the operation does not + // apply to this commit (for example, when the table is not opted in or has no committed + // snapshot). The dispatcher records an empty return as a "skipped" metric. Optional> prepare(TableDto savedDto); } diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java index bc757d37f..98e8b02eb 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/postcommit/PostCommitProperties.java @@ -4,23 +4,24 @@ import lombok.Setter; import org.springframework.boot.context.properties.ConfigurationProperties; -/** - * Configuration for the Tables Service post-commit operation framework. Property prefix {@code - * tables.postcommit}. When {@code enabled} is false (the default), no dispatcher bean is - * constructed and registered {@link PostCommitOperation} beans are never invoked. - */ +// Configuration for the Tables Service post-commit operation framework. Property prefix is +// tables.postcommit. +// +// When enabled is false (the default), the dispatcher bean is not constructed, and any +// registered PostCommitOperation beans are never invoked. @ConfigurationProperties("tables.postcommit") @Getter @Setter public class PostCommitProperties { - /** Master switch. When false, no operations are dispatched on commit. */ + // Master switch. When false, no operations are dispatched on commit. private boolean enabled = false; - /** - * Wall-clock ceiling applied by the dispatcher to each operation's prepared {@code Mono}. Bounds - * resource occupancy (connections, threads) but does not block the commit thread. Default 3000 - * ms. - */ + // Wall-clock ceiling that the dispatcher applies to each operation's prepared Mono. + // + // This bounds resource occupancy (connections, threads) for a misbehaving operation. It does + // not block the commit thread, because operations run on the underlying reactive scheduler. + // + // Default is 3000 ms. private long perOpTimeoutMs = 3000L; } From ecc65f8ff9bed21165f9f10002138af752bcee7e Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 15:43:21 -0700 Subject: [PATCH 13/14] docs: drop internal ticket reference from OptimizerStatsRequest comment Server-side idempotency wiring is a follow-up; the comment now states that without naming an internal tracker. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tables/services/optimizer/OptimizerStatsRequest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java index 8239184ec..2cb2b2873 100644 --- a/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java +++ b/services/tables/src/main/java/com/linkedin/openhouse/tables/services/optimizer/OptimizerStatsRequest.java @@ -43,7 +43,7 @@ public static class Stats { @JsonInclude(JsonInclude.Include.NON_NULL) public static class Snapshot { // Iceberg snapshot ID. Sent so the optimizer can use it as an idempotency token on upsert and - // reject out-of-order replays. Server-side wiring is tracked in BDP-102985. + // reject out-of-order replays. private Long snapshotId; private String tableVersion; From f31fea01b6ada5855baba88b449ff85723b2603b Mon Sep 17 00:00:00 2001 From: mkuchenbecker Date: Wed, 27 May 2026 17:53:34 -0700 Subject: [PATCH 14/14] test(optimizer-5): wire end-to-end demo to the new post-commit framework The on-commit stats hook used to be gated by cluster.optimizer.base-uri and pushed a minimal payload (databaseName/tableName/properties only). The merge replaces it with the PostCommitOperation framework, which is opt-in per table and pushes the full snapshot summary + snapshot id. Update the demo to exercise the new path: * docker-compose: set TABLES_POSTCOMMIT_ENABLED=true, OPTIMIZER_STATS_ENABLED=true, OPTIMIZER_STATS_BASE_URI (replaces CLUSTER_OPTIMIZER_BASE_URI, which is no longer read by anything). * CREATE TABLE in demo-setup.sh now sets maintenance.optimizer.stats.enabled=true alongside the OFD flag, so each demo table is opted in to the stats push. * Stats verification is now a bounded poll (30s) because the push is async fire-and-forget. After settling, the check also asserts the new payload shape: a sample table's stats row carries non-zero numCurrentFiles, non-zero tableSizeBytes, and the snapshotId. End result: the demo still passes if and only if every committed table generated a stats row, and the row actually contains snapshot stats (not just identity fields). Co-Authored-By: Claude Opus 4.7 (1M context) --- demo-setup.sh | 40 +++++++++++++++++-- .../oh-hadoop-spark/docker-compose.yml | 5 ++- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/demo-setup.sh b/demo-setup.sh index b39116a4d..6fc5491f7 100755 --- a/demo-setup.sh +++ b/demo-setup.sh @@ -69,7 +69,10 @@ for entry in $TABLES; do "$SCRIPT_DIR/local-spark-sql.sh" "DROP TABLE IF EXISTS openhouse.db1.$TABLE" > /dev/null "$SCRIPT_DIR/local-spark-sql.sh" "CREATE TABLE openhouse.db1.$TABLE ( id STRING, val STRING - ) TBLPROPERTIES ('maintenance.optimizer.ofd.enabled'='true')" > /dev/null + ) TBLPROPERTIES ( + 'maintenance.optimizer.ofd.enabled'='true', + 'maintenance.optimizer.stats.enabled'='true' + )" > /dev/null for i in $(seq 1 "$WRITES"); do "$SCRIPT_DIR/local-spark-sql.sh" \ @@ -88,11 +91,42 @@ for entry in $TABLES; do echo "$TABLE=$TABLE_UUID" >> /tmp/demo_ofd_uuids.txt done -STATS_COUNT=$(curl -sf "$OPT_API/v1/optimizer/stats?limit=100" | jq 'length') +# Stats push is async fire-and-forget; the dispatcher subscribes on the Netty event +# loop after the commit thread returns. Poll briefly for the rows to settle so we +# don't false-fail before the loop wakes up. +echo "" +echo "=== [Wait] Tables on-commit stats push to land in optimizer DB ===" +STATS_WAIT_SECS=30 +i=0 +STATS_COUNT=0 +while [ $i -lt "$STATS_WAIT_SECS" ]; do + STATS_COUNT=$(curl -sf "$OPT_API/v1/optimizer/stats?limit=100" | jq 'length') + [ "$STATS_COUNT" -ge "$TABLE_COUNT" ] && break + printf " stats rows: %d/%d (waited %ds)\r" "$STATS_COUNT" "$TABLE_COUNT" "$i" + sleep 2 + i=$((i + 2)) +done +echo "" [ "$STATS_COUNT" -ge "$TABLE_COUNT" ] \ - || { echo "FAIL: expected $TABLE_COUNT stats rows, got $STATS_COUNT"; exit 1; } + || { echo "FAIL: expected $TABLE_COUNT stats rows, got $STATS_COUNT after ${STATS_WAIT_SECS}s"; exit 1; } echo "PASS: $STATS_COUNT stats rows posted by Tables Service on-commit hook" +# Verify the new payload shape carries snapshot stats, not just identity fields. +# Pick one table and confirm the optimizer recorded a non-zero numCurrentFiles, a +# non-zero tableSizeBytes, and the snapshot ID we sent. +SAMPLE_UUID=$(head -n1 /tmp/demo_ofd_uuids.txt | cut -d= -f2) +SAMPLE_ROW=$(curl -sf "$OPT_API/v1/optimizer/stats/$SAMPLE_UUID") +SAMPLE_NUM_FILES=$(echo "$SAMPLE_ROW" | jq -r '.stats.snapshot.numCurrentFiles // 0') +SAMPLE_SIZE=$(echo "$SAMPLE_ROW" | jq -r '.stats.snapshot.tableSizeBytes // 0') +SAMPLE_SNAPSHOT_ID=$(echo "$SAMPLE_ROW" | jq -r '.stats.snapshot.snapshotId // empty') +[ "$SAMPLE_NUM_FILES" -gt 0 ] \ + || { echo "FAIL: sample stats row has numCurrentFiles=$SAMPLE_NUM_FILES, expected > 0"; exit 1; } +[ "$SAMPLE_SIZE" -gt 0 ] \ + || { echo "FAIL: sample stats row has tableSizeBytes=$SAMPLE_SIZE, expected > 0"; exit 1; } +[ -n "$SAMPLE_SNAPSHOT_ID" ] \ + || { echo "FAIL: sample stats row missing snapshotId"; exit 1; } +echo "PASS: sample stats row carries snapshot payload (files=$SAMPLE_NUM_FILES, bytes=$SAMPLE_SIZE, snapshotId=$SAMPLE_SNAPSHOT_ID)" + echo "" echo "=== [2/4] Expire old snapshots (creates orphan data files) ===" kill_idle_session diff --git a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml index ec404ae81..bcf4787bf 100644 --- a/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml +++ b/infra/recipes/docker-compose/oh-hadoop-spark/docker-compose.yml @@ -22,7 +22,10 @@ services: - OTEL_METRICS_EXPORTER=none - OTEL_LOGS_EXPORTER=none - OTEL_TRACES_SAMPLER=always_on - - CLUSTER_OPTIMIZER_BASE_URI=http://openhouse-optimizer:8080 + # Post-commit operation framework: enable the dispatcher and the optimizer-stats op. + - TABLES_POSTCOMMIT_ENABLED=true + - OPTIMIZER_STATS_ENABLED=true + - OPTIMIZER_STATS_BASE_URI=http://openhouse-optimizer:8080 openhouse-jobs: container_name: local.openhouse-jobs