diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java
new file mode 100644
index 000000000..e602cf709
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkApp.java
@@ -0,0 +1,446 @@
+package com.linkedin.openhouse.jobs.spark;
+
+import com.google.common.collect.Iterables;
+import com.linkedin.openhouse.common.metrics.DefaultOtelConfig;
+import com.linkedin.openhouse.common.metrics.OtelEmitter;
+import com.linkedin.openhouse.jobs.exception.TableValidationException;
+import com.linkedin.openhouse.jobs.spark.optimizer.OperationUpdateRequest;
+import com.linkedin.openhouse.jobs.spark.optimizer.OptimizerServiceClient;
+import com.linkedin.openhouse.jobs.spark.state.StateManager;
+import com.linkedin.openhouse.jobs.util.AppConstants;
+import com.linkedin.openhouse.jobs.util.AppsOtelEmitter;
+import com.linkedin.openhouse.jobs.util.TableStateValidator;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.api.common.Attributes;
+import java.io.IOException;
+import java.time.Duration;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.Option;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.actions.DeleteOrphanFiles;
+
+/**
+ * Batched orphan-files-deletion Spark app. One Spark job processes a list of {@code (table,
+ * operationId)} pairs that the optimizer scheduler bin-packed into a single batch. Each table is
+ * handled by a worker thread; per-table failures are caught and reported back independently — the
+ * job continues for the remaining tables and exits 0 if at least one table succeeds.
+ *
+ * <p>This is the multi-table counterpart of {@link OrphanFilesDeletionSparkApp}. The single-table
+ * app remains the deployment unit when bin size is 1, and stays the canonical reference for the
+ * actual deletion logic.
+ *
+ * <p>Example invocation:
+ *
+ * <pre>{@code
+ * com.linkedin.openhouse.jobs.spark.BatchedOrphanFilesDeletionSparkApp \
+ *   --tableNames db.t1,db.t2,db.t3 \
+ *   --operationIds op-uuid-1,op-uuid-2,op-uuid-3 \
+ *   --tableUuids tab-uuid-1,tab-uuid-2,tab-uuid-3 \
+ *   --resultsEndpoint http://optimizer.svc:8080 \
+ *   --driverParallelism 4
+ * }</pre>
+ */
+@Slf4j
+public class BatchedOrphanFilesDeletionSparkApp extends BaseSparkApp {
+
+  private static final String OPERATION_TYPE = "ORPHAN_FILES_DELETION";
+  private static final String STATUS_SUCCESS = "SUCCESS";
+  private static final String STATUS_FAILED = "FAILED";
+  private static final int DEFAULT_MAX_ORPHAN_FILE_SAMPLE_SIZE = 20000;
+  private static final int DEFAULT_MIN_OFD_TTL_IN_DAYS = 3;
+
+  private final List<BatchEntry> entries;
+  private final String resultsEndpoint;
+  private final int driverParallelism;
+  private final long ttlSeconds;
+  private final String backupDir;
+  private final int concurrentDeletes;
+  private final boolean streamResults;
+  private final int maxOrphanFileSampleSize;
+
+  public BatchedOrphanFilesDeletionSparkApp(
+      String jobId,
+      StateManager stateManager,
+      OtelEmitter otelEmitter,
+      List<BatchEntry> entries,
+      String resultsEndpoint,
+      int driverParallelism,
+      long ttlSeconds,
+      String backupDir,
+      int concurrentDeletes,
+      boolean streamResults,
+      int maxOrphanFileSampleSize) {
+    super(jobId, stateManager, otelEmitter);
+    this.entries = entries;
+    this.resultsEndpoint = resultsEndpoint;
+    this.driverParallelism = Math.max(1, driverParallelism);
+    this.ttlSeconds = ttlSeconds;
+    this.backupDir = backupDir;
+    this.concurrentDeletes = concurrentDeletes;
+    this.streamResults = streamResults;
+    this.maxOrphanFileSampleSize = maxOrphanFileSampleSize;
+  }
+
+  @Override
+  protected void runInner(Operations ops) {
+    log.info(
+        "Batched OFD start: entries={} driverParallelism={} resultsEndpoint={}",
+        entries.size(),
+        driverParallelism,
+        resultsEndpoint);
+
+    if (entries.isEmpty()) {
+      log.warn("Batched OFD invoked with no entries; nothing to do");
+      return;
+    }
+
+    int successCount;
+    try (OptimizerServiceClient client = newOptimizerClient()) {
+      successCount = runBatch(ops, client);
+    }
+
+    int failureCount = entries.size() - successCount;
+    log.info(
+        "Batched OFD finished: total={} success={} failed={}",
+        entries.size(),
+        successCount,
+        failureCount);
+
+    if (successCount == 0) {
+      throw new RuntimeException(
+          String.format("All %d operations in batch failed", entries.size()));
+    }
+  }
+
+  private int runBatch(Operations ops, OptimizerServiceClient client) {
+    ExecutorService pool = Executors.newFixedThreadPool(driverParallelism);
+    try {
+      // Two-phase pipeline: submit every worker first (so they run concurrently), then await each.
+      // Pairing each Future with its BatchEntry via AbstractMap.SimpleImmutableEntry.
+      List<Map.Entry<BatchEntry, Future<Boolean>>> submissions =
+          entries.stream()
+              .map(
+                  entry ->
+                      new AbstractMap.SimpleImmutableEntry<>(
+                          entry, pool.submit(new TableWorker(ops, entry, client))))
+              .collect(Collectors.toList());
+      return submissions.stream()
+          .mapToInt(submission -> awaitOne(submission.getKey(), submission.getValue(), client))
+          .sum();
+    } finally {
+      shutdownPool(pool);
+    }
+  }
+
+  private int awaitOne(BatchEntry entry, Future<Boolean> future, OptimizerServiceClient client) {
+    try {
+      return Boolean.TRUE.equals(future.get()) ? 1 : 0;
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      log.error("Worker interrupted (likely job cancellation): fqtn={}", entry.getFqtn(), e);
+      otelEmitter.count(
+          METRICS_SCOPE,
+          "optimizer_batch_interrupted",
+          1,
+          Attributes.of(AttributeKey.stringKey(AppConstants.TABLE_NAME), entry.getFqtn()));
+      return 0;
+    } catch (ExecutionException e) {
+      // The worker catches Throwable internally and always reports its own result, so reaching
+      // here means the worker itself leaked an exception. Be defensive: post FAILED so the
+      // operation row doesn't sit SCHEDULED until the stale-timeout.
+      log.error(
+          "Worker threw outside its own catch for fqtn={} — reporting FAILED",
+          entry.getFqtn(),
+          e.getCause());
+      reportResult(entry, false, client);
+      return 0;
+    }
+  }
+
+  private void shutdownPool(ExecutorService pool) {
+    pool.shutdown();
+    try {
+      if (!pool.awaitTermination(30, TimeUnit.SECONDS)) {
+        pool.shutdownNow();
+      }
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      pool.shutdownNow();
+    }
+  }
+
+  protected OptimizerServiceClient newOptimizerClient() {
+    return new OptimizerServiceClient(resultsEndpoint);
+  }
+
+  /** POST the per-operation outcome to the Optimizer Service. Failure here is logged + counted. */
+  private void reportResult(BatchEntry entry, boolean success, OptimizerServiceClient client) {
+    OperationUpdateRequest body =
+        OperationUpdateRequest.builder()
+            .operationId(entry.getOperationId())
+            .status(success ? STATUS_SUCCESS : STATUS_FAILED)
+            .tableUuid(entry.getTableUuid())
+            .databaseName(entry.getDatabaseName())
+            .tableName(entry.getTableName())
+            .operationType(OPERATION_TYPE)
+            .build();
+    try {
+      client.updateOperation(body);
+    } catch (IOException e) {
+      log.error(
+          "Failed to report operation result; row will stay SCHEDULED until stale-timeout: operationId={} fqtn={}",
+          entry.getOperationId(),
+          entry.getFqtn(),
+          e);
+      otelEmitter.count(
+          METRICS_SCOPE,
+          "optimizer_update_failed",
+          1,
+          Attributes.of(AttributeKey.stringKey(AppConstants.TABLE_NAME), entry.getFqtn()));
+    }
+  }
+
+  /** One unit of work in a batched OFD job. */
+  private final class TableWorker implements Callable<Boolean> {
+    private final Operations ops;
+    private final BatchEntry entry;
+    private final OptimizerServiceClient client;
+
+    TableWorker(Operations ops, BatchEntry entry, OptimizerServiceClient client) {
+      this.ops = ops;
+      this.entry = entry;
+      this.client = client;
+    }
+
+    @Override
+    public Boolean call() {
+      String fqtn = entry.getFqtn();
+      boolean success = false;
+      try {
+        log.info("OFD start: fqtn={} operationId={}", fqtn, entry.getOperationId());
+        Table table = ops.getTable(fqtn);
+        long olderThanTimestampMillis =
+            System.currentTimeMillis() - TimeUnit.SECONDS.toMillis(resolveTtlSeconds(table));
+        DeleteOrphanFiles.Result result =
+            ops.deleteOrphanFiles(
+                table,
+                olderThanTimestampMillis,
+                Boolean.parseBoolean(
+                    table.properties().getOrDefault(AppConstants.BACKUP_ENABLED_KEY, "false")),
+                backupDir,
+                concurrentDeletes,
+                streamResults,
+                maxOrphanFileSampleSize);
+        // Count via iteration rather than materializing the full path list: a table with millions
+        // of orphan files would otherwise OOM the driver, and that risk multiplies with
+        // driverParallelism workers running concurrently.
+        int orphanCount = Iterables.size(result.orphanFileLocations());
+        otelEmitter.count(
+            METRICS_SCOPE,
+            AppConstants.ORPHAN_FILE_COUNT,
+            orphanCount,
+            Attributes.of(AttributeKey.stringKey(AppConstants.TABLE_NAME), fqtn));
+        validate(fqtn);
+        success = true;
+        log.info("OFD success: fqtn={} orphansDetected={}", fqtn, orphanCount);
+      } catch (Throwable t) {
+        log.error("OFD failed: fqtn={} operationId={}", fqtn, entry.getOperationId(), t);
+      } finally {
+        reportResult(entry, success, client);
+      }
+      return success;
+    }
+
+    /**
+     * Re-runs {@link TableStateValidator} — the same post-job consistency check the single-table
+     * {@link OrphanFilesDeletionSparkApp} uses — to confirm the table's manifests and metadata are
+     * intact after deletion. A failure here is treated as a failed operation: it's logged, counted,
+     * and re-thrown so the outer {@link #call()} marks {@code success=false}.
+     */
+    private void validate(String fqtn) {
+      try {
+        TableStateValidator.run(ops.spark(), fqtn);
+      } catch (TableValidationException e) {
+        log.error("Post-job validation failed: fqtn={}", fqtn, e);
+        otelEmitter.count(
+            METRICS_SCOPE,
+            "post_run_validation_error",
+            1,
+            Attributes.of(
+                AttributeKey.stringKey(AppConstants.TABLE_NAME),
+                fqtn,
+                AttributeKey.stringKey(AppConstants.JOB_NAME),
+                BatchedOrphanFilesDeletionSparkApp.class.getSimpleName()));
+        throw e;
+      }
+    }
+
+    private long resolveTtlSeconds(Table table) {
+      long resolved = ttlSeconds;
+      if (Boolean.parseBoolean(
+          table.properties().getOrDefault(AppConstants.OFD_ONE_DAY_TTL_ENABLED_KEY, "false"))) {
+        resolved = TimeUnit.DAYS.toSeconds(1);
+      }
+      String tableType =
+          table
+              .properties()
+              .getOrDefault(AppConstants.OPENHOUSE_TABLE_TYPE_KEY, AppConstants.TABLE_TYPE_PRIMARY);
+      if (AppConstants.TABLE_TYPE_REPLICA.equals(tableType)) {
+        long days = Duration.ofSeconds(resolved).toDays();
+        if (days < DEFAULT_MIN_OFD_TTL_IN_DAYS) {
+          resolved = TimeUnit.DAYS.toSeconds(DEFAULT_MIN_OFD_TTL_IN_DAYS);
+        }
+      }
+      return resolved;
+    }
+  }
+
+  /** Per-table inputs for one operation row inside a bin. */
+  @lombok.AllArgsConstructor
+  @lombok.Builder
+  @lombok.Getter
+  @lombok.ToString
+  public static class BatchEntry {
+    private final String fqtn;
+    private final String operationId;
+    private final String tableUuid;
+    private final String databaseName;
+    private final String tableName;
+  }
+
+  public static void main(String[] args) {
+    OtelEmitter otelEmitter =
+        new AppsOtelEmitter(Collections.singletonList(DefaultOtelConfig.getOpenTelemetry()));
+    createApp(args, otelEmitter).run();
+  }
+
+  public static BatchedOrphanFilesDeletionSparkApp createApp(
+      String[] args, OtelEmitter otelEmitter) {
+    List<Option> extraOptions = new ArrayList<>();
+    extraOptions.add(
+        new Option(
+            null, "tableNames", true, "Comma-separated list of fully-qualified table names"));
+    extraOptions.add(
+        new Option(
+            null, "operationIds", true, "Comma-separated operation UUIDs, parallel to tableNames"));
+    extraOptions.add(
+        new Option(
+            null, "tableUuids", true, "Comma-separated table UUIDs, parallel to tableNames"));
+    extraOptions.add(
+        new Option(null, "resultsEndpoint", true, "Base URL of the Optimizer Service"));
+    extraOptions.add(
+        new Option(null, "driverParallelism", true, "Worker threads in this batch (default 1)"));
+    extraOptions.add(
+        new Option("tr", "trashDir", true, "Orphan files staging dir before deletion"));
+    extraOptions.add(
+        new Option(
+            "r",
+            "ttl",
+            true,
+            "How old files should be to be considered orphaned in seconds, minimum 1d is enforced"));
+    extraOptions.add(new Option("b", "backupDir", true, "Backup directory for deleted data"));
+    extraOptions.add(
+        new Option("c", "concurrentDeletes", true, "Number of concurrent deletes per table"));
+    extraOptions.add(
+        new Option(
+            null, "streamResults", false, "Stream orphan file deletions instead of collecting"));
+    extraOptions.add(
+        new Option(null, "maxOrphanFileSampleSize", true, "Max orphan file sample paths returned"));
+
+    CommandLine cmdLine = createCommandLine(args, extraOptions);
+
+    List<BatchEntry> entries =
+        buildEntries(
+            cmdLine.getOptionValue("tableNames"),
+            cmdLine.getOptionValue("operationIds"),
+            cmdLine.getOptionValue("tableUuids"));
+
+    return new BatchedOrphanFilesDeletionSparkApp(
+        getJobId(cmdLine),
+        createStateManager(cmdLine, otelEmitter),
+        otelEmitter,
+        entries,
+        requireOption(cmdLine, "resultsEndpoint"),
+        Integer.parseInt(cmdLine.getOptionValue("driverParallelism", "1")),
+        Math.max(
+            NumberUtils.toLong(cmdLine.getOptionValue("ttl"), TimeUnit.DAYS.toSeconds(7)),
+            TimeUnit.DAYS.toSeconds(1)),
+        cmdLine.getOptionValue("backupDir", ".backup"),
+        Integer.parseInt(cmdLine.getOptionValue("concurrentDeletes", "10")),
+        cmdLine.hasOption("streamResults"),
+        Integer.parseInt(
+            cmdLine.getOptionValue(
+                "maxOrphanFileSampleSize", String.valueOf(DEFAULT_MAX_ORPHAN_FILE_SAMPLE_SIZE))));
+  }
+
+  static List<BatchEntry> buildEntries(String tableNames, String operationIds, String tableUuids) {
+    if (tableNames == null
+        || operationIds == null
+        || tableUuids == null
+        || tableNames.isEmpty()
+        || operationIds.isEmpty()
+        || tableUuids.isEmpty()) {
+      throw new IllegalArgumentException(
+          "--tableNames, --operationIds, and --tableUuids are all required and must be non-empty");
+    }
+    String[] tables = tableNames.split(",");
+    String[] ops = operationIds.split(",");
+    String[] uuids = tableUuids.split(",");
+    if (tables.length != ops.length || tables.length != uuids.length) {
+      throw new IllegalArgumentException(
+          String.format(
+              "Parallel-list length mismatch: tableNames=%d operationIds=%d tableUuids=%d",
+              tables.length, ops.length, uuids.length));
+    }
+    List<BatchEntry> entries = new ArrayList<>(tables.length);
+    for (int i = 0; i < tables.length; i++) {
+      String fqtn = tables[i].trim();
+      String[] dbAndTable = fqtn.split("\\.", 2);
+      if (dbAndTable.length != 2 || dbAndTable[0].isEmpty() || dbAndTable[1].isEmpty()) {
+        throw new IllegalArgumentException(
+            "tableNames entries must be fully-qualified (db.table): " + fqtn);
+      }
+      entries.add(
+          BatchEntry.builder()
+              .fqtn(fqtn)
+              .operationId(ops[i].trim())
+              .tableUuid(uuids[i].trim())
+              .databaseName(dbAndTable[0])
+              .tableName(dbAndTable[1])
+              .build());
+    }
+    return entries;
+  }
+
+  private static String requireOption(CommandLine cmdLine, String name) {
+    String value = cmdLine.getOptionValue(name);
+    if (value == null || value.isEmpty()) {
+      throw new IllegalArgumentException("--" + name + " is required");
+    }
+    return value;
+  }
+
+  /** Visible for tests. */
+  List<BatchEntry> getEntries() {
+    return Collections.unmodifiableList(entries);
+  }
+
+  /** Visible for tests. */
+  int getDriverParallelism() {
+    return driverParallelism;
+  }
+}
diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OperationUpdateRequest.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OperationUpdateRequest.java
new file mode 100644
index 000000000..715873aaa
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OperationUpdateRequest.java
@@ -0,0 +1,26 @@
+package com.linkedin.openhouse.jobs.spark.optimizer;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+/**
+ * Wire-compatible body for {@code POST /v1/optimizer/operations/update} on the Optimizer Service.
+ *
+ * <p>Mirrors {@code com.linkedin.openhouse.optimizer.api.spec.UpdateOperationRequest} from the
+ * optimizer service module so this app can be built before that module merges. Keep the two in
+ * sync.
+ */
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class OperationUpdateRequest {
+  private String operationId;
+  private String status;
+  private String tableUuid;
+  private String databaseName;
+  private String tableName;
+  private String operationType;
+}
diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OptimizerServiceClient.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OptimizerServiceClient.java
new file mode 100644
index 000000000..50cc20b02
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/spark/optimizer/OptimizerServiceClient.java
@@ -0,0 +1,96 @@
+package com.linkedin.openhouse.jobs.spark.optimizer;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.IOException;
+import java.util.Objects;
+import java.util.concurrent.TimeUnit;
+import lombok.Builder;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
+
+/**
+ * Thin OkHttp client for the Optimizer Service. The batched Spark app calls {@link
+ * #updateOperation(OperationUpdateRequest)} once per finished operation to record SUCCESS or
+ * FAILED.
+ *
+ * <p>Errors are surfaced as {@link IOException}; the caller decides whether to retry. Per the
+ * design, a missed update is recoverable — the operation row stays SCHEDULED and the Analyzer's
+ * stale-timeout will re-queue it.
+ *
+ * <p>Construct with the {@link Config} builder to override the default timeouts.
+ */
+@Slf4j
+public class OptimizerServiceClient implements AutoCloseable {
+
+  private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
+  private static final String UPDATE_PATH = "/v1/optimizer/operations/update";
+
+  private final String baseUrl;
+  private final OkHttpClient httpClient;
+  private final ObjectMapper objectMapper;
+
+  public OptimizerServiceClient(String baseUrl) {
+    this(Config.builder().baseUrl(baseUrl).build());
+  }
+
+  public OptimizerServiceClient(Config config) {
+    this(config.getBaseUrl(), clientFor(config), new ObjectMapper());
+  }
+
+  OptimizerServiceClient(String baseUrl, OkHttpClient httpClient, ObjectMapper objectMapper) {
+    this.baseUrl = stripTrailingSlash(Objects.requireNonNull(baseUrl, "baseUrl"));
+    this.httpClient = httpClient;
+    this.objectMapper = objectMapper;
+  }
+
+  public void updateOperation(OperationUpdateRequest body) throws IOException {
+    String url = baseUrl + UPDATE_PATH;
+    String json = objectMapper.writeValueAsString(body);
+    Request request = new Request.Builder().url(url).post(RequestBody.create(json, JSON)).build();
+    try (Response response = httpClient.newCall(request).execute()) {
+      if (!response.isSuccessful()) {
+        throw new IOException(
+            String.format(
+                "Optimizer Service update failed: url=%s status=%d operationId=%s",
+                url, response.code(), body.getOperationId()));
+      }
+      log.info(
+          "Reported operation update: operationId={} status={} httpStatus={}",
+          body.getOperationId(),
+          body.getStatus(),
+          response.code());
+    }
+  }
+
+  @Override
+  public void close() {
+    httpClient.dispatcher().executorService().shutdown();
+    httpClient.connectionPool().evictAll();
+  }
+
+  private static OkHttpClient clientFor(Config config) {
+    return new OkHttpClient.Builder()
+        .connectTimeout(config.getConnectTimeoutSeconds(), TimeUnit.SECONDS)
+        .readTimeout(config.getReadTimeoutSeconds(), TimeUnit.SECONDS)
+        .writeTimeout(config.getWriteTimeoutSeconds(), TimeUnit.SECONDS)
+        .build();
+  }
+
+  private static String stripTrailingSlash(String url) {
+    return url.endsWith("/") ? url.substring(0, url.length() - 1) : url;
+  }
+
+  /** Tunable transport settings. Defaults match the previous hardcoded values. */
+  @lombok.Getter
+  @Builder
+  public static class Config {
+    private final String baseUrl;
+    @Builder.Default private final long connectTimeoutSeconds = 10L;
+    @Builder.Default private final long readTimeoutSeconds = 30L;
+    @Builder.Default private final long writeTimeoutSeconds = 30L;
+  }
+}
diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/Bin.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/Bin.java
new file mode 100644
index 000000000..0b40b4958
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/Bin.java
@@ -0,0 +1,49 @@
+package com.linkedin.openhouse.jobs.util.binpack;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import lombok.Getter;
+import lombok.ToString;
+
+/**
+ * Mutable accumulator used by {@link FirstFitDecreasingBinPacker}. After packing completes the
+ * caller treats the returned bins as immutable; {@link #items()} returns an unmodifiable view.
+ */
+@ToString
+public class Bin {
+  private final List<BinItem> items = new ArrayList<>();
+  @Getter private long totalWeight;
+  @Getter private long totalSizeBytes;
+
+  /**
+   * Returns true iff adding {@code item} would keep this bin at or below all three caps. A cap of
+   * {@code <= 0} disables that dimension.
+   */
+  boolean fits(BinItem item, long maxWeight, long maxSizeBytes, int maxItems) {
+    if (maxItems > 0 && items.size() >= maxItems) {
+      return false;
+    }
+    if (maxWeight > 0 && totalWeight + item.getWeight() > maxWeight) {
+      return false;
+    }
+    if (maxSizeBytes > 0 && totalSizeBytes + item.getSizeBytes() > maxSizeBytes) {
+      return false;
+    }
+    return true;
+  }
+
+  void add(BinItem item) {
+    items.add(item);
+    totalWeight += item.getWeight();
+    totalSizeBytes += item.getSizeBytes();
+  }
+
+  public List<BinItem> items() {
+    return Collections.unmodifiableList(items);
+  }
+
+  public int size() {
+    return items.size();
+  }
+}
diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/BinItem.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/BinItem.java
new file mode 100644
index 000000000..68bcb16e2
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/BinItem.java
@@ -0,0 +1,29 @@
+package com.linkedin.openhouse.jobs.util.binpack;
+
+import lombok.Builder;
+import lombok.Getter;
+import lombok.NonNull;
+import lombok.ToString;
+
+/**
+ * A single packable unit for {@link FirstFitDecreasingBinPacker}. Carries everything the batched
+ * Spark app needs both to do the work ({@link #fqtn}) and to report the result back to the
+ * Optimizer Service ({@link #operationId}, {@link #tableUuid}, {@link #databaseName}, {@link
+ * #tableName}).
+ *
+ * <p>{@link #weight} is the bin-packing dimension (for OFD: number of current files in the table).
+ * {@link #sizeBytes} is a secondary capacity dimension that lets the packer cap the total on-disk
+ * footprint of a bin independently of file count.
+ */
+@Getter
+@Builder
+@ToString
+public class BinItem {
+  @NonNull private final String fqtn;
+  @NonNull private final String operationId;
+  @NonNull private final String tableUuid;
+  @NonNull private final String databaseName;
+  @NonNull private final String tableName;
+  private final long weight;
+  private final long sizeBytes;
+}
diff --git a/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPacker.java b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPacker.java
new file mode 100644
index 000000000..71009d3ff
--- /dev/null
+++ b/apps/spark/src/main/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPacker.java
@@ -0,0 +1,70 @@
+package com.linkedin.openhouse.jobs.util.binpack;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import java.util.stream.Collectors;
+import lombok.Builder;
+import lombok.extern.slf4j.Slf4j;
+
+/**
+ * First-fit-decreasing bin packer used by the optimizer scheduler to group table operations into
+ * batches before launching a single Spark job per batch.
+ *
+ * <p>Each bin has three independent caps:
+ *
+ * <ul>
+ *   <li>{@code maxWeightPerBin} — total {@link BinItem#getWeight()} (for OFD: number of files)
+ *   <li>{@code maxSizeBytesPerBin} — total on-disk size of all tables in the bin
+ *   <li>{@code maxItemsPerBin} — number of tables per bin
+ * </ul>
+ *
+ * <p>An item that exceeds any single cap on its own is placed into a bin by itself rather than
+ * dropped — we never silently skip maintenance work for an oversized table.
+ *
+ * <p>Pass {@code 0} or a negative value for any cap to disable that dimension.
+ */
+@Slf4j
+@Builder
+public class FirstFitDecreasingBinPacker {
+
+  @Builder.Default private final long maxWeightPerBin = 1_000_000L;
+  @Builder.Default private final long maxSizeBytesPerBin = 5L * 1024L * 1024L * 1024L * 1024L;
+  @Builder.Default private final int maxItemsPerBin = 50;
+
+  public List<Bin> pack(List<BinItem> items) {
+    if (items == null || items.isEmpty()) {
+      return new ArrayList<>();
+    }
+
+    List<BinItem> sorted =
+        items.stream()
+            .sorted(Comparator.comparingLong(BinItem::getWeight).reversed())
+            .collect(Collectors.toList());
+
+    List<Bin> bins = new ArrayList<>();
+    for (BinItem item : sorted) {
+      Bin target = null;
+      for (Bin bin : bins) {
+        if (bin.fits(item, maxWeightPerBin, maxSizeBytesPerBin, maxItemsPerBin)) {
+          target = bin;
+          break;
+        }
+      }
+      if (target == null) {
+        target = new Bin();
+        bins.add(target);
+        if (!target.fits(item, maxWeightPerBin, maxSizeBytesPerBin, maxItemsPerBin)) {
+          log.warn(
+              "Item exceeds per-bin caps on its own; placing in dedicated bin: fqtn={} weight={} sizeBytes={}",
+              item.getFqtn(),
+              item.getWeight(),
+              item.getSizeBytes());
+        }
+      }
+      target.add(item);
+    }
+    log.info("Packed {} items into {} bins", items.size(), bins.size());
+    return bins;
+  }
+}
diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppArgsTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppArgsTest.java
new file mode 100644
index 000000000..7a32e503f
--- /dev/null
+++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/spark/BatchedOrphanFilesDeletionSparkAppArgsTest.java
@@ -0,0 +1,74 @@
+package com.linkedin.openhouse.jobs.spark;
+
+import java.util.List;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Pure-Java unit tests for {@link BatchedOrphanFilesDeletionSparkApp#buildEntries}. No Spark
+ * session, no HTTP — exercises the CLI-parsing edges that decide whether the app can even start.
+ */
+public class BatchedOrphanFilesDeletionSparkAppArgsTest {
+
+  @Test
+  public void buildEntriesParsesParallelLists() {
+    List<BatchedOrphanFilesDeletionSparkApp.BatchEntry> entries =
+        BatchedOrphanFilesDeletionSparkApp.buildEntries(
+            "db1.t1,db2.t2", "op-1,op-2", "uuid-1,uuid-2");
+
+    Assertions.assertEquals(2, entries.size());
+    Assertions.assertEquals("db1.t1", entries.get(0).getFqtn());
+    Assertions.assertEquals("db1", entries.get(0).getDatabaseName());
+    Assertions.assertEquals("t1", entries.get(0).getTableName());
+    Assertions.assertEquals("op-1", entries.get(0).getOperationId());
+    Assertions.assertEquals("uuid-1", entries.get(0).getTableUuid());
+    Assertions.assertEquals("db2.t2", entries.get(1).getFqtn());
+    Assertions.assertEquals("op-2", entries.get(1).getOperationId());
+  }
+
+  @Test
+  public void buildEntriesTrimsWhitespaceInEachEntry() {
+    List<BatchedOrphanFilesDeletionSparkApp.BatchEntry> entries =
+        BatchedOrphanFilesDeletionSparkApp.buildEntries(
+            " db1.t1 , db2.t2 ", " op-1 , op-2 ", " uuid-1 , uuid-2 ");
+
+    Assertions.assertEquals("db1.t1", entries.get(0).getFqtn());
+    Assertions.assertEquals("op-1", entries.get(0).getOperationId());
+    Assertions.assertEquals("uuid-1", entries.get(0).getTableUuid());
+  }
+
+  @Test
+  public void buildEntriesRejectsMismatchedLengths() {
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () ->
+            BatchedOrphanFilesDeletionSparkApp.buildEntries("db.a,db.b", "op-1", "uuid-1,uuid-2"));
+  }
+
+  @Test
+  public void buildEntriesRejectsNullArguments() {
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () -> BatchedOrphanFilesDeletionSparkApp.buildEntries(null, "op-1", "uuid-1"));
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () -> BatchedOrphanFilesDeletionSparkApp.buildEntries("db.a", null, "uuid-1"));
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () -> BatchedOrphanFilesDeletionSparkApp.buildEntries("db.a", "op-1", null));
+  }
+
+  @Test
+  public void buildEntriesRejectsEmptyStrings() {
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () -> BatchedOrphanFilesDeletionSparkApp.buildEntries("", "op-1", "uuid-1"));
+  }
+
+  @Test
+  public void buildEntriesRejectsNonFqtn() {
+    Assertions.assertThrows(
+        IllegalArgumentException.class,
+        () -> BatchedOrphanFilesDeletionSparkApp.buildEntries("just_a_table", "op-1", "uuid-1"));
+  }
+}
diff --git a/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPackerTest.java b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPackerTest.java
new file mode 100644
index 000000000..d77944772
--- /dev/null
+++ b/apps/spark/src/test/java/com/linkedin/openhouse/jobs/util/binpack/FirstFitDecreasingBinPackerTest.java
@@ -0,0 +1,150 @@
+package com.linkedin.openhouse.jobs.util.binpack;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class FirstFitDecreasingBinPackerTest {
+
+  @Test
+  public void emptyInputProducesEmptyOutput() {
+    List<Bin> bins = packer(100, 0, 50).pack(Collections.emptyList());
+    Assertions.assertTrue(bins.isEmpty());
+  }
+
+  @Test
+  public void nullInputProducesEmptyOutput() {
+    List<Bin> bins = packer(100, 0, 50).pack(null);
+    Assertions.assertTrue(bins.isEmpty());
+  }
+
+  @Test
+  public void itemsSortDescendingByWeightBeforePacking() {
+    List<BinItem> items =
+        Arrays.asList(item("db.t_small", 10), item("db.t_big", 100), item("db.t_mid", 50));
+
+    List<Bin> bins = packer(1000, 0, 50).pack(items);
+
+    // Everything fits in one bin since capacity is huge; order inside the bin must be descending.
+    Assertions.assertEquals(1, bins.size());
+    Bin only = bins.get(0);
+    Assertions.assertEquals(3, only.size());
+    Assertions.assertEquals("db.t_big", only.items().get(0).getFqtn());
+    Assertions.assertEquals("db.t_mid", only.items().get(1).getFqtn());
+    Assertions.assertEquals("db.t_small", only.items().get(2).getFqtn());
+    Assertions.assertEquals(160, only.getTotalWeight());
+  }
+
+  @Test
+  public void weightCapForcesMultipleBins() {
+    List<BinItem> items =
+        Arrays.asList(item("db.a", 60), item("db.b", 50), item("db.c", 40), item("db.d", 30));
+
+    List<Bin> bins = packer(100, 0, 50).pack(items);
+
+    // FFD on [60, 50, 40, 30] with cap 100:
+    //   bin0: 60          -> remaining 40
+    //   bin0 tries 50 -> doesn't fit, new bin1: 50
+    //   bin0 tries 40 -> fits, bin0: 60+40=100
+    //   bin1 tries 30 -> fits, bin1: 50+30=80
+    Assertions.assertEquals(2, bins.size());
+    Assertions.assertEquals(100, bins.get(0).getTotalWeight());
+    Assertions.assertEquals(80, bins.get(1).getTotalWeight());
+  }
+
+  @Test
+  public void maxItemsPerBinCapHonored() {
+    List<BinItem> items =
+        IntStream.range(0, 5).mapToObj(i -> item("db.t" + i, 1)).collect(Collectors.toList());
+
+    List<Bin> bins = packer(1000, 0, 2).pack(items);
+
+    Assertions.assertEquals(3, bins.size());
+    Assertions.assertEquals(2, bins.get(0).size());
+    Assertions.assertEquals(2, bins.get(1).size());
+    Assertions.assertEquals(1, bins.get(2).size());
+  }
+
+  @Test
+  public void maxSizeBytesCapHonored() {
+    List<BinItem> items =
+        Arrays.asList(
+            BinItem.builder()
+                .fqtn("db.a")
+                .operationId("op-a")
+                .tableUuid("uuid-a")
+                .databaseName("db")
+                .tableName("a")
+                .weight(1)
+                .sizeBytes(800L)
+                .build(),
+            BinItem.builder()
+                .fqtn("db.b")
+                .operationId("op-b")
+                .tableUuid("uuid-b")
+                .databaseName("db")
+                .tableName("b")
+                .weight(1)
+                .sizeBytes(800L)
+                .build());
+
+    List<Bin> bins = packer(1000, 1000L, 50).pack(items);
+
+    Assertions.assertEquals(2, bins.size());
+    Assertions.assertEquals(800L, bins.get(0).getTotalSizeBytes());
+    Assertions.assertEquals(800L, bins.get(1).getTotalSizeBytes());
+  }
+
+  @Test
+  public void oversizedItemGetsItsOwnBinRatherThanBeingDropped() {
+    List<BinItem> items =
+        Arrays.asList(item("db.tiny1", 10), item("db.giant", 500), item("db.tiny2", 10));
+
+    List<Bin> bins = packer(100, 0, 50).pack(items);
+
+    // Giant exceeds the cap on its own — must still appear in some bin.
+    long total = bins.stream().mapToLong(Bin::getTotalWeight).sum();
+    Assertions.assertEquals(520, total);
+    boolean giantPresent =
+        bins.stream()
+            .flatMap(b -> b.items().stream())
+            .anyMatch(i -> i.getFqtn().equals("db.giant"));
+    Assertions.assertTrue(giantPresent, "oversized item must not be dropped");
+  }
+
+  @Test
+  public void disabledCapsLetEverythingShareOneBin() {
+    List<BinItem> items =
+        IntStream.range(0, 20).mapToObj(i -> item("db.t" + i, 100)).collect(Collectors.toList());
+
+    List<Bin> bins = packer(0, 0, 0).pack(items);
+
+    Assertions.assertEquals(1, bins.size());
+    Assertions.assertEquals(20, bins.get(0).size());
+  }
+
+  private static FirstFitDecreasingBinPacker packer(long maxWeight, long maxSize, int maxItems) {
+    return FirstFitDecreasingBinPacker.builder()
+        .maxWeightPerBin(maxWeight)
+        .maxSizeBytesPerBin(maxSize)
+        .maxItemsPerBin(maxItems)
+        .build();
+  }
+
+  private static BinItem item(String fqtn, long weight) {
+    String[] parts = fqtn.split("\\.", 2);
+    return BinItem.builder()
+        .fqtn(fqtn)
+        .operationId("op-" + parts[1])
+        .tableUuid("uuid-" + parts[1])
+        .databaseName(parts[0])
+        .tableName(parts[1])
+        .weight(weight)
+        .sizeBytes(0L)
+        .build();
+  }
+}